This file explains all data processing and analysis steps for Using consumption and reward simulations to increase the appeal of plantbased foods. The RProject has a private library in order to make all steps computationally reproducible. I use the renv package for this. That means you will need to install the package and run the renv::restore command, see instructions here. This works best if you don’t have the library and staging folder within the renv folder.

# pacman makes it easier to load and install packages
if (!requireNamespace("pacman"))
  install.packages("pacman")

library(pacman)

# load packages
p_load(
  knitr,
  MASS,
  Matrix,
  mgcv,
  Rmisc,
  here,
  DT,
  cowplot,
  pastecs,
  lme4,
  afex,
  lsmeans,
  MuMIn,
  tidyverse
)

# set seed
set.seed(42)

# set theme for ggplot
theme_set(theme_cowplot())

Below the custom functions I’ll be using throughout the script.

### FUNCTION 1 ###
# function that transforms feet and inches to cm
feet_inches_to_cm <- 
  function(feet, inches){
    feet_to_cm <- feet * 30.48
    inches_to_cm <- inches * 2.54
    cm <- feet_to_cm + inches_to_cm
    cm <- round(cm, digits = 0)
    
    return(cm)
  }

### FUNCTION 2 ###
# function that transforms stones and pounds to kg
stones_pounds_to_kg <-
  function(pounds, stones=NULL){
    if(missing(stones)){
      pounds_to_kg <- pounds * 0.453592
      kg <- pounds_to_kg
      kg <- round(kg, digits = 0)
      
      return(kg)
    }
    else{
      stones_to_kg <- stones * 6.35029
      pounds_to_kg <- pounds * 0.453592
      kg <- stones_to_kg + pounds_to_kg
      kg <- round(kg, digits = 0)
      
      return(kg)
    }
  }

### FUNCTION 3 ###
# function that gives summary statistics and a densityplot, with different levels of repeated vs. trait-like measures
describe_visualize <- 
  function(df, 
           variable, 
           repeated_measure = FALSE,
           want_summary = FALSE){
    
    variable <- enquo(variable)
    
    # specifies whether the variable we want to plot is a trait-like or repeated measure
    if (repeated_measure == FALSE){
      df <-
        df %>%
        group_by(pp) %>%
        slice(1) %>% 
        ungroup()
    } else {
      df <-
        df
    }
    
    # descriptive stats
    sum_stats <-
      df %>% 
      pull(!! variable) %>% 
      stat.desc()
    
    # plot
    plot <-
      ggplot(df, aes(x = !! variable)) +
        geom_density(color = "darkgrey", fill = "darkgrey") +
        geom_point(aes(x = !! variable, y = 0))
    
    # return the two (if both are wanted)
    if(want_summary == TRUE){
      return(list(kable(sum_stats), plot))
    } else{
      return(plot)
    }
  }

### FUNCTION 4 ###
# function that returns a table for trait-like categorical variables
my_table <-
  function(df, variable){
    variable <- enquo(variable)
    
    df %>% 
      group_by(pp) %>% 
      slice(1) %>% 
      pull(!! variable) %>% 
      table()
  }

### FUNCTION 5 ###
# raincloud plot function from https://github.com/RainCloudPlots/RainCloudPlots/blob/master/tutorial_R/R_rainclouds.R
# Defining the geom_flat_violin function ----
# Note: the below code modifies the
# existing github page by removing a parenthesis in line 50

"%||%" <- function(a, b) {
  if (!is.null(a)) a else b
}

geom_flat_violin <- function(mapping = NULL, data = NULL, stat = "ydensity",
                             position = "dodge", trim = TRUE, scale = "area",
                             show.legend = NA, inherit.aes = TRUE, ...) {
  layer(
    data = data,
    mapping = mapping,
    stat = stat,
    geom = GeomFlatViolin,
    position = position,
    show.legend = show.legend,
    inherit.aes = inherit.aes,
    params = list(
      trim = trim,
      scale = scale,
      ...
    )
  )
}

#' @rdname ggplot2-ggproto
#' @format NULL
#' @usage NULL
#' @export
GeomFlatViolin <-
  ggproto("GeomFlatViolin", Geom,
    setup_data = function(data, params) {
      data$width <- data$width %||%
        params$width %||% (resolution(data$x, FALSE) * 0.9)

      # ymin, ymax, xmin, and xmax define the bounding rectangle for each group
      data %>%
        group_by(group) %>%
        mutate(
          ymin = min(y),
          ymax = max(y),
          xmin = x,
          xmax = x + width / 2
        )
    },

    draw_group = function(data, panel_scales, coord) {
      # Find the points for the line to go all the way around
      data <- transform(data,
        xminv = x,
        xmaxv = x + violinwidth * (xmax - x)
      )

      # Make sure it's sorted properly to draw the outline
      newdata <- rbind(
        plyr::arrange(transform(data, x = xminv), y),
        plyr::arrange(transform(data, x = xmaxv), -y)
      )

      # Close the polygon: set first and last point the same
      # Needed for coord_polar and such
      newdata <- rbind(newdata, newdata[1, ])

      ggplot2:::ggname("geom_flat_violin", GeomPolygon$draw_panel(newdata, panel_scales, coord))
    },

    draw_key = draw_key_polygon,

    default_aes = aes(
      weight = 1, colour = "grey20", fill = "white", size = 0.5,
      alpha = NA, linetype = "solid"
    ),

    required_aes = c("x", "y")
  )

### FUNCTION 6 ###
# creates raincloud plots
rc_plot <-
  function(
    df,
    measurement,
    variable,
    group,
    xlab,
    title,
    facet = NULL
  ) {
    rc_plot <-
    ggplot(
      df %>%
        filter(measure == measurement),
      aes_string(
        x = group,
        y = variable,
        fill = group,
        color = group
      )
    ) +
      geom_flat_violin(position = position_nudge(x = .2,
                                                 y = 0),
                       adjust = 2) +
      geom_point(position = position_jitter(width = .15),
                 size = .10) +
      ylab("Rating (0-100)") +
      xlab(xlab) +
      coord_flip() +
      guides(fill = FALSE,
             color = FALSE) +
      scale_color_brewer(palette = "Dark2") +
      scale_fill_brewer(palette = "Dark2") +
      ggtitle(title)
    
    if(!is.null(facet)){
      rc_plot <-
      rc_plot +
        facet_wrap(facet)
    }
    
    return(rc_plot)
  }

### FUNCTION 6 ###
# creates line plots
line_plot <-
  function(
    df,
    x_group,
    measure,
    group_by
  ) {
    
    measure <- enquo(measure)
    
    ggplot(df,
           aes_string(
             x = x_group,
             y = measure,
             group = group_by,
             color = group_by,
             linetype = group_by
           )
    ) +
      geom_line() +
      geom_point() +
      geom_errorbar(
        aes(
          ymin = !! measure - se,
          ymax = !! measure + se),
        width = .05) +
      scale_color_brewer(palette = "Dark2")
  }

### FUNCTION 7 ###
# function that calculates proportion of residuals above a cut-off for an lmer object
proportion_residuals <-
  function(model, cut_off){
    
    # scale the model residuals
    model_resid <- resid(model, scaled = TRUE)
    
    # take the absolute
    model_resid <- abs(model_resid)
    
    # select those below cut_off and divide by total number of residuals
    proportion <- sum(model_resid > cut_off) / length(model_resid)
    
    return(proportion)
  }

### FUNCTION 8 ###
# shows the proportions from the above function in a table
lmer_residuals <-
  function(lmer_model){
    
    # proportion of scaled residuals that +- 2, 2.5, and 3
    residuals_2 <- proportion_residuals(lmer_model, 2)
    residuals_25 <- proportion_residuals(lmer_model, 2.5)
    residuals_3 <- proportion_residuals(lmer_model, 3)
    
    # put those percentages into a tibble, add whether they pass a cut-off, and produce nice table
    resid_proportions <-
      tibble(
        standardized_cutoff = c(2, 2.5, 3),
        proportion_cutoff = c(.05, .01, .0001),
        proportion = c(
          residuals_2,
          residuals_25,
          residuals_3
        ),
        below_cutoff = proportion < proportion_cutoff
      )
    
    print(resid_proportions)
  }

### FUNCTION 8 ###
# function takes the formal outlier values on cook's distance from an lmer project and arranges them from highest to lowest
arrange_cook <-
  function(outliers, grouping_factor){
    cooks.distance(outliers) %>% 
      as_tibble(rownames = as.character(grouping_factor)) %>%
      rename(cooks_distance = V1) %>% 
      arrange(desc(cooks_distance)) %>% 
      head()
  }

1. Load and wrangle data

1.1 Load data

First, we load the data. For a codebook, see the [enter file name] file. Note that the Qualtrics output format is quite the nightmare for importing; found a solution here.

headers <- read_csv(
  here("data", "study3", "raw_data.csv"),
  col_names = FALSE,
  n_max = 1) %>%
  as.character() # variable names stored in a vector

raw_data <- read_csv(
  here("data", "study3", "raw_data.csv"),
  col_names = headers, # use that name vector here to name variables
  na = "",
  skip = 3 # those are the three rows that contain the variable info
  )

rm(headers) # clear names from workspace

The data frame is extremely wide; we have 854 variables. This has several reasons:

  • Qualtrics data are generally in the wide format, so each stimulus gets a column rather than a row
  • What label type was assigned to which food type for attractiveness ratings was counterbalanced, such that there were two sets, each containing 40 variables (= 80 total)
  • Participants only answered one set, but both sets are contained in the data, meaning that participants will have entries for one set of variables, but only NA for the other set
  • Each stimulus was timed, meaning Qualtrics measured four additional variables per stimulus: first click, last click, number of clicks, and after how much time the page was submitted
  • This means in addition to the 80 stimulus variables, there are 80 (the two sets) x 4 (timing variables) = 320 variables just for attractiveness ratings (total 400 attractiveness variables)
  • This entire procedure also applies to simulation ratings, doubling the number of stimulus rating variables (800 total rating variables)
  • The remaining 54 are demographic etc. variables

1.2 Wrangling

We need to get these data into the long format. Before that, let’s only keep those variables we need and simultaneously give them proper names. Also, the response_id already is a unique identifier, but not easy to read. I’ll add a participant number (pp). Last, because I’m cleaning the raw data, I’ll use a new data object: working_file

working_file <- 
  raw_data %>% 
  select(
    start_date = StartDate,
    end_date = EndDate,
    duration = `Duration (in seconds)`,
    finished = Finished,
    response_id = ResponseId,
    fullfil_criteria = Q3,
    consent = Q8,
    prolific_id = Q9,
    group = Group,
    hungry = Q11_1,
    thirsty = Q11_2,
    desire_instructions_time = `Q13_Page Submit`,
    pb_1_c_attr_1:`mb_20_enh_attr_time_Click Count`, # all attractiveness variables
    simulations_instructions_time = `Q41_Page Submit`,
    pb_1_c_sim_1:`mb_20_enh_sim_time_Click Count`, # all simulation variables
    age = Q19,
    gender = Q20,
    height_choice = Q21,
    height_cm = Q22_1,
    height_feet = Q23_1,
    height_inches = Q23_2,
    weight_choice = Q24,
    weight_kilos = Q25_1,
    weight_stones = Q26_1,
    weight_pounds = Q26_2,
    weight_pounds_only = Q27_1,
    diet = Q28,
    diet_details = Q29,
    meat_per_week = Q30,
    change_diet = Q31_1,
    attitude_meat = attitude_meat_1,
    attitude_vegan = attitude_vegan_1,
    attitude_pb = attitude_pb_1,
    allergies = Q32,
    allergies_details = Q33,
    language_issues = Q34,
    language_issues_details = Q35,
    didnt_like = Q36,
    study_about = Q37,
    technical_issues = Q38,
    -contains("Click"), # omit click count variables
  ) %>%
  mutate(pp = paste0("pp_", row_number())) %>%
  select(# add participant number and set it as first column
    pp,
    everything()
  )

Next, I assign the correct variable type. In addition, I exported the Qualtrics data with numbers as output, so I will reintroduce the factor levels for factor variables.

working_file <-
  working_file %>% 
  mutate_at(
    vars(
      pp,
      finished,
      response_id:group,
      gender,
      height_choice,
      weight_choice,
      diet,
      allergies,
      language_issues
    ),
    list(~ as.factor(.))
  ) %>% 
  mutate(
    finished = fct_recode(
      finished,
      no = "0",
      yes = "1"
    ),
    gender = fct_recode(
      gender,
      male = "1",
      female = "2",
      other = "3"
    ),
    diet = fct_recode(
      diet,
      omnivore = "1",
      pescatarian = "2",
      vegetarian = "3",
      vegan = "4",
      other = "5"
    ),
    height_choice = fct_recode(
      height_choice,
      cm = "1",
      feet_inches = "2"
    ),
    weight_choice = fct_recode(
      weight_choice,
      kg = "1",
      stones_pounds = "2",
      pounds_only = "3"
    )
  ) %>% 
  mutate_at(
    vars(
      fullfil_criteria,
      consent,
      allergies,
      language_issues
    ),
    list(
      ~ fct_recode(
        .,
        yes = "1",
        no = "2"
      )
    )
  )

Upon inspection, I saw that meat eating frequency (meat_per_week) has two non-numerical entries: 1/2 and less than once. I don’t want to remove the latter, so I’ll convert both to a number (0.5).

working_file <-
  working_file %>%
  mutate(
    meat_per_week = case_when(
      meat_per_week %in% c("1/2", "less than once") ~ 0.5,
      TRUE ~ as.numeric(meat_per_week)
    )
  )

Similarly, there are text entries in weight_kilos and weight_pounds. In weight_kilos there’s a text entry explaining that the respondent doesn’t know their weight, so we’ll set that to NA. In weight_pounds, the respondent indicated stones, but "N/A" for pounds, so we’ll set that to 0, assuming the respondent weighed around the indicated weight in stones. We’ll do the same for all those who only provided stones, but no pounds.

working_file <- 
  working_file %>% 
  mutate(
    weight_kilos = as.numeric(
      na_if(weight_kilos, "I don't lnow unfortuantely and I do not have acces to a set of scales qhilw filling out this survey. I applogise for this, but I was not informed I would need this prior to starting the survey.")
    ),
    weight_pounds = as.numeric(
      na_if(weight_pounds, "N/A")
    ),
    weight_pounds = case_when(
      weight_choice == "stones_pounds" & is.na(weight_pounds) ~ 0,
      TRUE ~ weight_pounds
    )
  )

Because the study was conducted in the UK, we also need to transform height and weight to cm and kg, respectively.

working_file <-
  working_file %>% 
  mutate(
    height_cm = case_when(
      height_choice == "cm" ~ height_cm,
      height_choice == "feet_inches" ~ feet_inches_to_cm(height_feet, height_inches)
    ),
    weight_kilos = case_when(
      weight_choice == "kg" ~ weight_kilos,
      weight_choice == "stones_pounds" ~ stones_pounds_to_kg(weight_pounds, weight_stones),
      weight_choice == "pounds_only" ~ stones_pounds_to_kg(weight_pounds_only) 
    )
  )

Now that the data are cleaned, I can finally transform them to the long format. Currently, the measurements of attractiveness and simulations are in the following format: pb_1_c_attr_1. The components (separated by underscores) of that format mean:

  • food type: pb (plant-based) or mb(meat-based)
  • stimulus number
  • label type: c (control) or enh (enhanced)
  • measurement/DV: attr (attractiveness) or sim (simulations)
  • meaningless Qualtrics appendix: always _1

Let’s remove the _1 at the end of those variable names, and also remove all Page Submit appendices from their variable names.

working_file <-
  working_file %>% 
  rename_at(
    vars(
      ends_with("_1"),
    ),
    list(
      ~ str_sub(
        ., 1, str_length(.)-2
      )
    )
  ) %>% 
  rename_at(
    vars(
      ends_with("Page Submit")
    ),
    list(
      ~ str_replace(
        ., "_Page Submit", ""
      )
    )
  )

1.3 Tidy data

Let’s get to turning the data into the long format. Because we used two sets of stimuli (counterbalancing what stimulus belongs to what food type and label type), half of the participants gave responses to half of the measurement variables; the other half of participant gave responses to the other half.

Therefore, participants will have missing values on those variables that belonged to the other set (i.e., the other counterbalancing condition). Luckily, the pivot_longer function is amazing, so we can drop those measurements per participants with the values_drop_na argument. This command has the nice side effect of also excluding those who did not consent to participate (because they have NAs everywhere.)

working_file <- 
  working_file %>% 
  pivot_longer(
    cols = c(contains("pb_"), contains("mb_")),
    names_to = c( # specifict the variables to be formed from the current variable names
      "food_type", 
      "stimulus_no",
      "label_type",
      "measure"
      ),
    values_to = c( # the values, which will be measurement type (attractiveness vs. simulations) and the timer per variable
      "rating", 
      "time"
      ),
    names_sep = "_",
    values_drop_na = TRUE # do not include empty entries due to counterbalancing as rows in the long format
  ) %>%
  
  # give proper variable names, labels, and variable types
  mutate(
    stimulus_no = as.numeric(stimulus_no)
  ) %>% 
  mutate(
    food_type = fct_recode(
      as.factor(food_type),
      meat_based = "mb",
      plant_based = "pb",
    ),
    label_type = fct_recode(
      as.factor(label_type),
      control = "c",
      enhanced = "enh"
    ),
    measure = fct_recode(
      as.factor(measure),
      attractiveness = "attr",
      simulations = "sim"
    )
  ) %>% # arbitrary, but I like to order the variables roughly in the order they were collected
  select(
    pp:simulations_instructions_time,
    food_type:time,
    everything()
  )

Alright, the data are in a tidy format. Below, I show the data of a random participant for illustration.

2. Exclusions

2.1 Exclude test runs

There are still a couple of test runs left (three in total). They can be identified because they do not follow the Prolific ID scheme (i.e., starting with a 5, less than 20 characters). We exclude them here.

# before excluding testruns
length(unique(working_file$pp))
## [1] 178
working_file <- 
  working_file %>% 
  filter(
    str_length(
      as.character(prolific_id)
      ) > 20
    )

# after exclusion
length(unique(working_file$pp))
## [1] 175

Now exclude those who didn’t finish the survey.

length(unique(working_file$pp))
## [1] 175
working_file <-
  working_file %>% 
  filter(finished == "yes")

length(unique(working_file$pp))
## [1] 174

Out of experience, Qualtrics isn’t very good at determining whether a survey is finished or not. We double check this: If indeed all participants gave all ratings, there should be 80 for each of the remaining 174 participants: 40 attractiveness ratings and 40 simulations ratings. Furthermore, there should be few NAs remaining on any of the ratings.

There are three cases where participants did not report a rating. I double checked that in the raw data file and that’s indeed the case. Respondents were not forced to give a response (Qualtrics option was “Request Response”), so participants simply did not respond to those three items.

# does each participant have 80 ratings (i.e., rows)?
working_file %>% 
  group_by(pp) %>% 
  count() %>%
  ungroup() %>% 
  summarize(
    all_there = sum(n == 80)
  )
## # A tibble: 1 x 1
##   all_there
##       <int>
## 1       174
# compare to sample
length(unique(working_file$pp))
## [1] 174
# are there NAs left on the rating or timing variable?
working_file %>% 
  select(rating, time) %>% 
  summarize_all(
    list(~ sum(is.na(.)))
  )
## # A tibble: 1 x 2
##   rating  time
##    <int> <int>
## 1      3     0
working_file %>% 
  filter(is.na(rating)) %>% 
  DT::datatable(
  .,
  options = list(
    autoWidth = TRUE,
    scrollY = TRUE,
    scrollX = TRUE, 
    pageLength = 3
  )
  ) 

2.3 Not fulfilling inclusion criteria

There was a filter question at the beginning of the study asking participants whether they fulfill the inclusion criteria. All participants indicated they fulfilled the criteria, so no need to exclude anyone.

working_file %>% 
  group_by(pp) %>% 
  slice(1) %>% 
  pull(fullfil_criteria) %>% 
  table()
## .
## yes  no 
## 174   0

2.3 Identify rushed responses

In the preregistration, we specified data quality checks: if participants do not comply with the experimental instructions or give (almost) exactly the same response on each trial (i.e., within 1 of 100 scale points).

To identify possibly rushed responses, we inspect the variation of ratings and compare those to the how quickly participants responded.

The variation looks fine, nobody just clicked the same point of the scale repeatedly. One participant was extremely fast, spending about 1.5 seconds per rating, possibly hinting at randomly clicking through.

# lowest variation of the ratings
working_file %>% 
  group_by(pp) %>% 
  summarize(
    mean = (mean(rating, na.rm = TRUE)),
    sd = sd(rating, na.rm = TRUE)
  ) %>% 
  arrange(sd)
## # A tibble: 174 x 3
##    pp      mean    sd
##    <fct>  <dbl> <dbl>
##  1 pp_139  42.1  10.1
##  2 pp_47   53.2  12.2
##  3 pp_137  58.1  14.3
##  4 pp_49   82.8  14.6
##  5 pp_141  53.6  14.8
##  6 pp_140  56.4  15.4
##  7 pp_160  59.3  15.8
##  8 pp_112  55.1  16.1
##  9 pp_125  62.5  16.2
## 10 pp_30   55.3  16.4
## # ... with 164 more rows
# check response times
time_means <-
  working_file %>% 
  group_by(pp) %>% 
  summarize(
    time_mean = mean(time)
  ) %>% 
  arrange(time_mean)

# inspect the fastest times
head(time_means, n = 10)
## # A tibble: 10 x 2
##    pp     time_mean
##    <fct>      <dbl>
##  1 pp_81       1.54
##  2 pp_69       2.40
##  3 pp_82       2.65
##  4 pp_54       2.92
##  5 pp_117      3.19
##  6 pp_64       3.23
##  7 pp_120      3.25
##  8 pp_63       3.38
##  9 pp_137      3.39
## 10 pp_85       3.42
# compare to median time
median(working_file$time)
## [1] 4.697
# plot them
time_means %>% 
  ggplot(aes(x = time_mean)) +
  geom_density(color = "darkgrey", fill = "darkgrey") +
  geom_point(aes(x = time_mean, y = 0))

Note: This is not preregistered. It’s pretty hard to tell what mean time on answering items indicates inattentiveness. However, we can at least set those mean times into relation with the sample median time. Leiner has put forward a useful way to identify meaningless data in internet studies, validated with real data.

The method is relatively straightforward: The Relative Speed Index (RSI) gives an impression of how fast participants responded to items without giving too much weight to quick or slow responses on individual stimuli. An RSI of > 1.75 is considered an indicator of meaningless data (actually, it might even be conservative).

The RSI is computed as follows:

  • Compute sample’s median completion time for each page
  • Divide those sample completion times by the individual respondents’ page completion time (i.e., speed factors)
  • Trim these speed factors to an interval of [0|3]
  • Average the trimmed speed factors per participant to obtain the RSI
working_file <-
  working_file %>% 
  
  # calculate median time for each stimulus (aka page)
  group_by(
    food_type,
    stimulus_no,
    label_type,
    measure
  ) %>% 
  summarize(
    page_time_median = median(time)
  ) %>% 
  
  # add those median page times to the data set and match with the stimuli there
  left_join(
    working_file,
    .,
    by = c(
      "food_type",
      "stimulus_no",
      "label_type",
      "measure"
    )
  ) %>% 
  
  # now divide the median page completion time by the individual page completion time (so per row)
  mutate(
    speed_factor = page_time_median / time
  ) %>% 
  
  # trim to [0|3]: dividing by zero will yield a speed factor of Inf, anything else a higher number
  mutate(
    speed_factor_trimmed = case_when(
      speed_factor == Inf ~ 0,
      speed_factor > 3 ~ 3,
      TRUE ~ speed_factor
    )
  ) %>% 
  
  # average those trimmed speed factors to obtain RSI
  group_by(pp) %>% 
  mutate(
    rsi = mean(speed_factor_trimmed)
  ) %>% 
  ungroup()

Let’s inspect the RSI. Indeed, there is at least one massive outlier, and several RSIs > 1.75. If we inspect mean completion time side by side with RSI, we see that several of those participants we suspected rushed through also come up here. We thus exclude those participants with an RSI > 1.75. Note: this decision was not preregistered, but we logged it in a decision log file.

I’ll run the analyses with and without these 8 cases, so I’ll put their data into a separate object.

# let's plot them
working_file %>% 
  group_by(pp) %>% 
  slice(1) %>% 
  ggplot(aes(x = rsi)) +
  geom_density() +
  geom_point(aes(x = rsi, y = 0))

# compare with raw mean time per page
working_file %>% 
  group_by(pp) %>% 
  slice(1) %>% 
  arrange(desc(rsi)) %>%
  select(pp, rsi) %>% 
  left_join(., 
            time_means,
            by = "pp"
  )
## # A tibble: 174 x 3
## # Groups:   pp [174]
##    pp       rsi time_mean
##    <fct>  <dbl>     <dbl>
##  1 pp_81   2.75      1.54
##  2 pp_116  2.18      4.02
##  3 pp_69   2.13      2.40
##  4 pp_82   2.12      2.65
##  5 pp_54   2.00      2.92
##  6 pp_118  1.97      5.24
##  7 pp_64   1.94      3.23
##  8 pp_125  1.77      3.71
##  9 pp_128  1.75      3.66
## 10 pp_137  1.74      3.39
## # ... with 164 more rows
# how many do we exclude?
length(unique((working_file$pp))) - nrow(working_file %>% filter(rsi <= 1.75) %>% group_by(pp) %>% slice(1))
## [1] 8
# create a separate file for those cases that we add when we test the robustness of the analysis
high_rsi <-
  working_file %>% 
  filter(rsi > 1.75)

# remove them from working file
working_file <-
  working_file %>% 
  filter(rsi <= 1.75)

3. Describe and visualize

3.1 Meta-data

How long did participants spend on the survey? Median duration is around ~ 14 minutes.

describe_visualize(
  working_file,
  duration,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val         1.660000e+02
## nbr.null        0.000000e+00
## nbr.na          0.000000e+00
## min             4.440000e+02
## max             2.513000e+03
## range           2.069000e+03
## sum             1.549810e+05
## median          8.460000e+02
## mean            9.336205e+02
## SE.mean         2.846142e+01
## CI.mean.0.95    5.619552e+01
## var             1.344687e+05
## std.dev         3.666997e+02
## coef.var        3.927717e-01
## 
## [[2]]

How long did they spend on the instructions for the attractiveness and simulations, respectively? Overall not long (~ 10 seconds), but some apparently left the browser window open for quite some time.

describe_visualize(
  working_file,
  desire_instructions_time,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val          166.0000000
## nbr.null           0.0000000
## nbr.na             0.0000000
## min                0.8990000
## max               53.7550000
## range             52.8560000
## sum             1854.5210000
## median             8.7835000
## mean              11.1718133
## SE.mean            0.6508350
## CI.mean.0.95       1.2850383
## var               70.3153031
## std.dev            8.3854221
## coef.var           0.7505874
## 
## [[2]]

describe_visualize(
  working_file,
  simulations_instructions_time,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                           x
## -------------  ------------
## nbr.val          166.000000
## nbr.null           0.000000
## nbr.na             0.000000
## min                1.064000
## max              368.953000
## range            367.889000
## sum             2943.293000
## median            11.281500
## mean              17.730681
## SE.mean            2.776208
## CI.mean.0.95       5.481473
## var             1279.417335
## std.dev           35.768944
## coef.var           2.017348
## 
## [[2]]

Were there language issues? Barely, and the qualitative answers don’t give cause for concern.

my_table(working_file, language_issues)
## .
## yes  no 
##   2 164
working_file %>% 
  group_by(pp) %>% 
  slice(1) %>% 
  ungroup() %>% 
  filter(language_issues == "yes") %>% 
  pull(language_issues_details)
## [1] "I didn't know the meaning of a few words" 
## [2] "a couple words I was unsure of, like ragu"

3.2 Demographic information

First, I look at hunger and thirst ratings. Participants seemed to be more thirsty than hungry.

describe_visualize(
  working_file,
  hungry,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val          166.0000000
## nbr.null          29.0000000
## nbr.na             0.0000000
## min                0.0000000
## max              100.0000000
## range            100.0000000
## sum             4744.0000000
## median            23.0000000
## mean              28.5783133
## SE.mean            1.9521333
## CI.mean.0.95       3.8543811
## var              632.5968602
## std.dev           25.1514783
## coef.var           0.8800897
## 
## [[2]]

describe_visualize(
  working_file,
  thirsty,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                           x
## -------------  ------------
## nbr.val          166.000000
## nbr.null          10.000000
## nbr.na             0.000000
## min                0.000000
## max              100.000000
## range            100.000000
## sum             7564.000000
## median            51.000000
## mean              45.566265
## SE.mean            1.963437
## CI.mean.0.95       3.876699
## var              639.944067
## std.dev           25.297116
## coef.var           0.555172
## 
## [[2]]

What’s the age range in our sample? One participant indicated to be 259 years old. Probably a typo, but not sure whether it’s supposed to be 25 or 59, so I set that value to NA.

describe_visualize(
  working_file,
  age,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                           x
## -------------  ------------
## nbr.val          166.000000
## nbr.null           0.000000
## nbr.na             0.000000
## min               18.000000
## max              259.000000
## range            241.000000
## sum             5353.000000
## median            30.000000
## mean              32.246988
## SE.mean            1.581207
## CI.mean.0.95       3.122006
## var              415.035597
## std.dev           20.372422
## coef.var           0.631762
## 
## [[2]]

working_file <-
  working_file %>% 
  mutate(
    age = if_else(age == 259, NA_real_, age)
  )

describe_visualize(
  working_file,
  age,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val          165.0000000
## nbr.null           0.0000000
## nbr.na             1.0000000
## min               18.0000000
## max               69.0000000
## range             51.0000000
## sum             5094.0000000
## median            30.0000000
## mean              30.8727273
## SE.mean            0.7868169
## CI.mean.0.95       1.5535972
## var              102.1483370
## std.dev           10.1068460
## coef.var           0.3273713
## 
## [[2]]

What is the gender distribution in our sample?

my_table(working_file, gender)
## .
##   male female  other 
##     48    117      1

What’s the height distribution? One participant appears to be giant. I think this is too unrealistic. We could think about whether that participant took the survey seriously, but the ratings and duration on instruction look good when inspecting the raw data. After setting that entry to missing, the rest looks pretty normal.

describe_visualize(
  working_file,
  height_cm,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val         1.650000e+02
## nbr.null        0.000000e+00
## nbr.na          1.000000e+00
## min             1.500000e+02
## max             2.900000e+02
## range           1.400000e+02
## sum             2.802400e+04
## median          1.700000e+02
## mean            1.698424e+02
## SE.mean         9.919466e-01
## CI.mean.0.95    1.958633e+00
## var             1.623531e+02
## std.dev         1.274178e+01
## coef.var        7.502120e-02
## 
## [[2]]

working_file <-
  working_file %>% 
  mutate(
    height_cm = if_else(height_cm == 290, NA_real_, height_cm)
  )

describe_visualize(
  working_file,
  height_cm,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val         1.640000e+02
## nbr.null        0.000000e+00
## nbr.na          2.000000e+00
## min             1.500000e+02
## max             1.930000e+02
## range           4.300000e+01
## sum             2.773400e+04
## median          1.700000e+02
## mean            1.691098e+02
## SE.mean         6.727867e-01
## CI.mean.0.95    1.328501e+00
## var             7.423328e+01
## std.dev         8.615874e+00
## coef.var        5.094840e-02
## 
## [[2]]

Let’s look at the weight. There’s a couple of participants who indicated to weigh less than 20 kilos. I find that hard to believe and set their weight as missing.

describe_visualize(
  working_file,
  weight_kilos,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val         1.640000e+02
## nbr.null        0.000000e+00
## nbr.na          2.000000e+00
## min             3.000000e+00
## max             1.510000e+02
## range           1.480000e+02
## sum             1.188020e+04
## median          7.000000e+01
## mean            7.244024e+01
## SE.mean         1.352381e+00
## CI.mean.0.95    2.670445e+00
## var             2.999452e+02
## std.dev         1.731893e+01
## coef.var        2.390788e-01
## 
## [[2]]

working_file %>% 
  filter(weight_kilos < 20) %>% 
  group_by(pp) %>% 
  slice(1) %>% 
  ungroup() %>% 
  select(pp, contains("weight"))
## # A tibble: 2 x 6
##   pp    weight_choice weight_kilos weight_stones weight_pounds
##   <fct> <fct>                <dbl>         <dbl>         <dbl>
## 1 pp_61 pounds_only              3            NA            NA
## 2 pp_90 pounds_only             18            NA            NA
## # ... with 1 more variable: weight_pounds_only <dbl>
working_file <-
  working_file %>% 
  mutate(
    weight_kilos = if_else(weight_kilos < 20, NA_real_, weight_kilos)
  )

describe_visualize(
  working_file,
  weight_kilos,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val         1.620000e+02
## nbr.null        0.000000e+00
## nbr.na          4.000000e+00
## min             4.300000e+01
## max             1.510000e+02
## range           1.080000e+02
## sum             1.185920e+04
## median          7.000000e+01
## mean            7.320494e+01
## SE.mean         1.253943e+00
## CI.mean.0.95    2.476297e+00
## var             2.547246e+02
## std.dev         1.596009e+01
## coef.var        2.180194e-01
## 
## [[2]]

Next, some information about their diet.

  • participants seem to be divided on whether they’re trying to change their diet
  • frequent meat eaters
  • like meat better than vegan or plant-based foods

There’s two participants who indicated to be vegetarian, even though they said they were omnivores at the beginning of the study. One of those participants even eats meat once a week. I’ll check whether their exclusion (pp_30 and pp_63) changes anything for the hypthesized analyses later.

# diet
my_table(working_file, diet)
## .
##   omnivore vegetarian      other 
##        163          2          1
# check those two alleged vegetarian
working_file %>% 
  filter(diet == "vegetarian") %>% 
  select(pp, diet, meat_per_week) %>% 
  distinct()
## # A tibble: 2 x 3
##   pp    diet       meat_per_week
##   <fct> <fct>              <dbl>
## 1 pp_30 vegetarian             0
## 2 pp_63 vegetarian             1
# how often they eat meat per week
describe_visualize(
  working_file,
  meat_per_week,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val          166.0000000
## nbr.null           2.0000000
## nbr.na             0.0000000
## min                0.0000000
## max               21.0000000
## range             21.0000000
## sum             1155.0000000
## median             7.0000000
## mean               6.9578313
## SE.mean            0.2889093
## CI.mean.0.95       0.5704357
## var               13.8557868
## std.dev            3.7223362
## coef.var           0.5349851
## 
## [[2]]

# to what extent participants are currently trying to change their diets
describe_visualize(
  working_file,
  change_diet,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val          166.0000000
## nbr.null           0.0000000
## nbr.na             0.0000000
## min                1.0000000
## max              100.0000000
## range             99.0000000
## sum             7449.8000000
## median            55.4500000
## mean              44.8783133
## SE.mean            2.6610126
## CI.mean.0.95       5.2540246
## var             1175.4439765
## std.dev           34.2847485
## coef.var           0.7639491
## 
## [[2]]

# attitudes toward eating meat
describe_visualize(
  working_file,
  attitude_meat,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val         1.660000e+02
## nbr.null        0.000000e+00
## nbr.na          0.000000e+00
## min             1.000000e+00
## max             1.000000e+02
## range           9.900000e+01
## sum             1.241726e+04
## median          8.140500e+01
## mean            7.480277e+01
## SE.mean         2.042682e+00
## CI.mean.0.95    4.033165e+00
## var             6.926433e+02
## std.dev         2.631812e+01
## coef.var        3.518335e-01
## 
## [[2]]

# attitudes toward vegan food
describe_visualize(
  working_file,
  attitude_vegan,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val          166.0000000
## nbr.null           0.0000000
## nbr.na             0.0000000
## min                1.0000000
## max              100.0000000
## range             99.0000000
## sum             9025.8000000
## median            56.5650000
## mean              54.3722892
## SE.mean            2.1154573
## CI.mean.0.95       4.1768554
## var              742.8764941
## std.dev           27.2557608
## coef.var           0.5012804
## 
## [[2]]

# attitude toward plant-based food
describe_visualize(
  working_file,
  attitude_pb,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val          166.0000000
## nbr.null           0.0000000
## nbr.na             0.0000000
## min                1.0000000
## max              100.0000000
## range             99.0000000
## sum             9886.2600000
## median            62.3500000
## mean              59.5557831
## SE.mean            1.9879785
## CI.mean.0.95       3.9251555
## var              656.0417288
## std.dev           25.6133116
## coef.var           0.4300726
## 
## [[2]]

# whether participants have food allergies
my_table(working_file, allergies)
## .
## yes  no 
##   8 158

3.3 Ratings

Next, I visualize the ratings on attractiveness and simulations, respectively. I’ll first visualize and describe them overall, then per factor level (as in two main effects of label type and food type), and then the interaction.

3.1 Attractiveness

First, I inspect the overall attractiveness ratings, regardless of label or food type.

describe_visualize(
  working_file %>% 
    filter(measure == "attractiveness"),
  rating,
  TRUE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val         6.640000e+03
## nbr.null        2.740000e+02
## nbr.na          0.000000e+00
## min             0.000000e+00
## max             1.000000e+02
## range           1.000000e+02
## sum             3.636170e+05
## median          6.070000e+01
## mean            5.476159e+01
## SE.mean         3.602866e-01
## CI.mean.0.95    7.062776e-01
## var             8.619149e+02
## std.dev         2.935839e+01
## coef.var        5.361127e-01
## 
## [[2]]

Next, a raincloud plot with ratings per label type. Doesn’t look like there’s much of a difference.

rc_plot(
  working_file,
  "attractiveness",
  "rating",
  "label_type",
  "Label Type",
  "Raincloud Plot of Food Attractiveness Ratings"
  )

# raw means and SDs (without taking grouping by PP into account)
working_file %>% 
  filter(measure == "attractiveness") %>% 
  group_by(label_type) %>% 
  summarize(
    mean = mean(rating, na.rm = TRUE),
    sd = sd(rating, na.rm = TRUE)
  )
## # A tibble: 2 x 3
##   label_type  mean    sd
##   <fct>      <dbl> <dbl>
## 1 control     53.1  29.4
## 2 enhanced    56.4  29.3
# means (unchanged) and SDs after aggregated per participant (to make it comparable to RM ANOVA)
working_file %>% 
  filter(measure == "attractiveness") %>% 
  group_by(pp, label_type) %>%
  summarize(mean_agg = mean(rating, na.rm = TRUE)) %>% 
  ungroup() %>% 
  group_by(label_type) %>% 
  summarize(
    mean = mean(mean_agg),
    sd = sd(mean_agg)
  )
## # A tibble: 2 x 3
##   label_type  mean    sd
##   <fct>      <dbl> <dbl>
## 1 control     53.1  12.4
## 2 enhanced    56.4  13.4

Let’s inspect the ratings per food type. People seem to like meat-based labels more.

rc_plot(
  working_file,
  "attractiveness",
  "rating",
  "food_type",
  "Food Type",
  "Raincloud Plot of Food Attractiveness Ratings"
  )

working_file %>% 
  filter(measure == "attractiveness") %>% 
  group_by(food_type) %>% 
  summarize(
    mean = mean(rating, na.rm = TRUE),
    sd = sd(rating, na.rm = TRUE)
  )
## # A tibble: 2 x 3
##   food_type    mean    sd
##   <fct>       <dbl> <dbl>
## 1 meat_based   61.3  28.0
## 2 plant_based  48.2  29.2

Next, I inspect the interaction of label type and food type for attractiveness ratings. The raincloud plots are not that easy to read (even if I add means with CIs), so for easier visualization I use a line plot on the aggregated means with within-subject standard error (from the Rmisc::summarySEwithin function).

rc_plot(
  working_file,
  "attractiveness",
  "rating",
  "label_type",
  "Label Type",
  "Raincloud Plot of Food Attractiveness Ratings",
  "food_type"
  )

# get summary statistics
summary_attr <-
  summarySEwithin(
    data = working_file %>%
      filter(measure == "attractiveness") %>%
      group_by(pp, label_type, food_type) %>%
      summarize(rating = mean(rating, na.rm = TRUE)),
    measurevar = "rating",
    withinvars = c("label_type", "food_type"),
    idvar = "pp"
  )

# have a look at means and SDs
summary_attr
##   label_type   food_type   N   rating       sd        se       ci
## 1    control  meat_based 166 59.78544 11.93978 0.9267069 1.829732
## 2    control plant_based 166 46.40062 11.97610 0.9295257 1.835298
## 3   enhanced  meat_based 166 62.87654 11.57998 0.8987805 1.774593
## 4   enhanced plant_based 166 49.98378 10.98582 0.8526650 1.683541
# line graph
line_plot(
  summary_attr,
  "label_type",
  rating,
  "food_type"
)

3.2 Simulations

First, I inspect the overall simulations ratings, regardless of label or food type.

describe_visualize(
  working_file %>% 
    filter(measure == "simulations"),
  rating,
  TRUE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val         6.638000e+03
## nbr.null        1.070000e+02
## nbr.na          2.000000e+00
## min             0.000000e+00
## max             1.000000e+02
## range           1.000000e+02
## sum             3.915712e+05
## median          6.394000e+01
## mean            5.898934e+01
## SE.mean         3.410187e-01
## CI.mean.0.95    6.685062e-01
## var             7.719578e+02
## std.dev         2.778413e+01
## coef.var        4.710026e-01
## 
## [[2]]

Next, a raincloud plot with simulations ratings per label type. Doesn’t look like there’s much of a difference.

rc_plot(
  working_file,
  "simulations",
  "rating",
  "label_type",
  "Label Type",
  "Raincloud Plot of Food Simulations Ratings"
  )

# raw means and SDs
working_file %>% 
  filter(measure == "simulations") %>% 
  group_by(label_type) %>% 
  summarize(
    mean = mean(rating, na.rm = TRUE),
    sd = sd(rating, na.rm = TRUE)
  )
## # A tibble: 2 x 3
##   label_type  mean    sd
##   <fct>      <dbl> <dbl>
## 1 control     55.5  28.4
## 2 enhanced    62.5  26.7
# means (unchanged) and SDs after aggregated per participant (to make it comparable to RM ANOVA)
working_file %>% 
  filter(measure == "simulations") %>% 
  group_by(pp, label_type) %>%
  summarize(mean_agg = mean(rating, na.rm = TRUE)) %>% 
  ungroup() %>% 
  group_by(label_type) %>% 
  summarize(
    mean = mean(mean_agg),
    sd = sd(mean_agg)
  )
## # A tibble: 2 x 3
##   label_type  mean    sd
##   <fct>      <dbl> <dbl>
## 1 control     55.5  15.1
## 2 enhanced    62.5  12.9

Let’s inspect the ratings per food type. People seem to simulate more with meat-based labels.

rc_plot(
  working_file,
  "simulations",
  "rating",
  "food_type",
  "Food Type",
  "Raincloud Plot of Food Simulations Ratings"
  )

working_file %>% 
  filter(measure == "simulations") %>% 
  group_by(food_type) %>% 
  summarize(
    mean = mean(rating, na.rm = TRUE),
    sd = sd(rating, na.rm = TRUE)
  )
## # A tibble: 2 x 3
##   food_type    mean    sd
##   <fct>       <dbl> <dbl>
## 1 meat_based   64.9  26.0
## 2 plant_based  53.1  28.2

Next, I inspect the interaction of label type and food type for simulations ratings with rainclouds and line graphs.

rc_plot(
  working_file,
  "simulations",
  "rating",
  "label_type",
  "Label Type",
  "Raincloud Plot of Food Attractiveness Ratings",
  "food_type"
  )

# get summary statistics
summary_sim <-
  summarySEwithin(
    data = working_file %>%
      filter(measure == "simulations") %>%
      group_by(pp, label_type, food_type) %>%
      summarize(rating = mean(rating, na.rm = TRUE)),
    measurevar = "rating",
    withinvars = c("label_type", "food_type"),
    idvar = "pp"
  )

# have a look at Ms and SDs
summary_sim
##   label_type   food_type   N   rating       sd        se       ci
## 1    control  meat_based 166 61.85078 11.84391 0.9192660 1.815041
## 2    control plant_based 166 49.09478 11.15353 0.8656821 1.709242
## 3   enhanced  meat_based 166 67.95105 11.06950 0.8591596 1.696364
## 4   enhanced plant_based 166 57.05902 12.05469 0.9356255 1.847342
# line graph
line_plot(
  summary_sim,
  "label_type",
  rating,
  "food_type"
)

4. Analysis

I now move to the actual analysis. I will structure the analysis section per hypothesis, and each hypothesis with a confirmatory/preregistered and an exploratory sub-section. I will note and explain deviations from the preregistrations wherever applicable. After the hypothesis sections, I’ll also describe exploratory results.

4.1 H1: Main effect of food label

The preregistered hypothesis reads as follows:

Simulation and attractiveness will be higher for foods with enhanced compared to neutral labels.

4.1.1 Confirmatory

I’ll start with confirmatory model. Note that the preregistration was imprecise when saying that we will predict the outcome with food type and label type. It actually means controlling for food item as random effect. Thus, we specify a model with maximal effects.

Before we can do that, we need to add the individual food item as a factor to the data set. As of now, we only have the food type plus a stimulus number, which together create 40 unique food items. I’ll add them here; for a full description, see the OSM materials.

# file that contains the foods
food_items <-
  read_csv(
    here("data", "study3", "food_items.csv"),
    col_types = list(
      col_factor(levels = NULL),
      col_number(),
      col_factor(levels = NULL)
    )
  )

head(food_items, n = 5)
## # A tibble: 5 x 3
##   food_type  stimulus_no food           
##   <fct>            <dbl> <fct>          
## 1 meat_based           1 chicken_balti  
## 2 meat_based           2 pepperoni_pizza
## 3 meat_based           3 empanadas      
## 4 meat_based           4 steak_pie      
## 5 meat_based           5 pasta_bake
working_file <-
  working_file %>% 
  left_join(
    .,
    food_items,
    by = c("food_type",
           "stimulus_no")
  ) %>% 
  select( # reorder columns
    pp:stimulus_no,
    food,
    everything()
  )

# also add them to the file with participants who had a high RSI
high_rsi <-
  high_rsi %>% 
  left_join(
    .,
    food_items,
    by = c("food_type",
           "stimulus_no")
  ) %>% 
  select( # reorder columns
    pp:stimulus_no,
    food,
    everything()
  )

For the supplementary online materials, we also want to provide a table that lists all food descriptions with average attractiveness ratings for each label type. For this, I get the means per food by label type.

means_per_food <-
  working_file %>% 
  filter(measure == "attractiveness") %>% 
  group_by(food, label_type) %>% 
  summarize(
    food_mean = mean(rating, na.rm = TRUE)
  ) %>% 
  mutate(
    food_mean = round(food_mean, digits = 2)
  ) %>%  # turn into wide format
  pivot_wider(
    id_cols = food,
    names_from = label_type,
    values_from = food_mean
  )

head(means_per_food)
## # A tibble: 6 x 3
## # Groups:   food [40]
##   food            control enhanced
##   <fct>             <dbl>    <dbl>
## 1 chicken_balti      60.9     62.3
## 2 pepperoni_pizza    67.6     72.4
## 3 empanadas          58.8     65.2
## 4 steak_pie          56.0     67.0
## 5 pasta_bake         69.1     70.6
## 6 risottoo           59.4     57.5
# write file as table
write_delim(
  means_per_food,
  here("data", "study3", "means_per_food.txt"),
  delim = ","
)

Also, because there’s two dependent variables in the data, I’ll split the working file into two analysis files, one per measurement.

simulations <-
  working_file %>% 
  filter(measure == "simulations") %>% 
  mutate(
    measure = droplevels(measure)
  )

attractiveness <-
  working_file %>% 
  filter(measure == "attractiveness") %>% 
  mutate(
    measure = droplevels(measure)
  )

4.1.1.1 Simulations

Construct the main effects model: predict simulations by label type. Because we’re only interested in the main effects, I’ll use dummy coding for the predictor; we only have two levels and Type 2 or 3 Sums of Squares will be the same. The model converges without problems.

# set contrasts
options(contrasts = c("contr.treatment", "contr.poly"))

# construct model
h1_sim <- lme4::lmer(
  rating ~
    label_type +
    (1 + label_type | pp) +
    (1 + label_type | food),
  simulations
)

# inspect model
summary(h1_sim)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating ~ label_type + (1 + label_type | pp) + (1 + label_type |  
##     food)
##    Data: simulations
## 
## REML criterion at convergence: 61175.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4229 -0.6460  0.1250  0.6867  2.6661 
## 
## Random effects:
##  Groups   Name               Variance Std.Dev. Corr 
##  pp       (Intercept)        199.15   14.112        
##           label_typeenhanced 102.04   10.101   -0.56
##  food     (Intercept)         70.54    8.399        
##           label_typeenhanced  15.20    3.899   -0.42
##  Residual                    528.00   22.978        
## Number of obs: 6638, groups:  pp, 166; food, 40
## 
## Fixed effects:
##                    Estimate Std. Error t value
## (Intercept)          55.480      1.767  31.398
## label_typeenhanced    7.018      1.146   6.124
## 
## Correlation of Fixed Effects:
##             (Intr)
## lbl_typnhnc -0.486

Now let’s inspect model diagnostics. The residuals look normally distributed; the proportion of large residuals is fine; the Q-Q-plot also looks fine.

However, there are clear cut-offs in the predicted vs. residuals plot. That’s because the outcome variable has (0 | 100) bounds. For example, a fitted value of 50 can be off by a maximum of -50 or 50, not lower or higher. Usually, this pattern happens with binomial distributions. However, I don’t think using a binomial model would be appropriate here: a) the residuals are normally distributed; b) I would argue that a 100 rating slider is conceptually rather different than a proportion (I wouldn’t know how to count successes and failures for a binomial distribution). Therefore, I will maintain the linear model, because I believe it makes sense conceptually and displays decent fit.

Moreover, two participants stand out as formal outliers: pp_91 and pp_48. For foods, there’s one stimulus with a large influence: vegan_fillets. I’ll check whether the model is robust to their exclusion.

# inspect standardized residuals
densityplot(resid(h1_sim, scaled = TRUE))

# q-q plot
car::qqPlot(resid(h1_sim, scaled = TRUE))

## [1] 3469 3542
# proportion of residuals in the extremes of the distribution
lmer_residuals(h1_sim)
## # A tibble: 3 x 4
##   standardized_cutoff proportion_cutoff proportion below_cutoff
##                 <dbl>             <dbl>      <dbl> <lgl>       
## 1                 2              0.05     0.0375   TRUE        
## 2                 2.5            0.01     0.0104   FALSE       
## 3                 3              0.0001   0.000904 FALSE
# obtain outliers
h1_sim_outliers_pp <- influence.ME::influence(h1_sim, c("pp"))
h1_sim_outliers_food <- influence.ME::influence(h1_sim, c("food"))

# plot formal outliers for pp
plot(h1_sim_outliers_pp, which = 'cook')

plot(h1_sim_outliers_pp, which = 'dfbetas')[1]

plot(h1_sim_outliers_pp, which = 'dfbetas')[2]

# which ones are the highest
arrange_cook(h1_sim_outliers_pp, "pp")
## # A tibble: 6 x 2
##   pp     cooks_distance
##   <chr>           <dbl>
## 1 pp_91          0.0392
## 2 pp_48          0.0262
## 3 pp_103         0.0179
## 4 pp_109         0.0168
## 5 pp_143         0.0142
## 6 pp_120         0.0137
# plot formal outliers for food
plot(h1_sim_outliers_food, which = 'cook')

plot(h1_sim_outliers_food, which = 'dfbetas')[1]

plot(h1_sim_outliers_food, which = 'dfbetas')[2]

# which ones are so far out on cook's distance?
arrange_cook(h1_sim_outliers_food, "food")
## # A tibble: 6 x 2
##   food             cooks_distance
##   <chr>                     <dbl>
## 1 vegan_fillets            0.0825
## 2 veggie_burger            0.0447
## 3 mushroom_burrito         0.0377
## 4 pasta_bake               0.0362
## 5 fishless_cakes           0.0314
## 6 mint_soup                0.0305
# plot the fitted vs. the residuals (to check for homo/heteroskedasticity) with added smoothed line.
plot(h1_sim, type = c('p', 'smooth'))

The effect is highly significant.

# get p-value
h1_sim_p <-
  mixed(
    rating ~
    label_type +
    (1 + label_type | pp) +
    (1 + label_type | food),
    simulations,
    type = 3,
    method = "S",
    check_contrasts = FALSE
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h1_sim_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ label_type + (1 + label_type | pp) + (1 + label_type | 
## Model:     food)
## Data: simulations
##            num Df den Df      F    Pr(>F)    
## label_type      1 95.131 37.509 2.037e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# approximate effect size
r.squaredGLMM(h1_sim)
##             R2m       R2c
## [1,] 0.01589684 0.3184168

4.1.1.2 Attractiveness

Next, I run the same model, but with attractiveness as outcome.

# construct model
h1_attr <- lme4::lmer(
  rating ~
    label_type +
    (1 + label_type | pp) +
    (1 + label_type | food),
  attractiveness
)

# inspect model
summary(h1_attr)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating ~ label_type + (1 + label_type | pp) + (1 + label_type |  
##     food)
##    Data: attractiveness
## 
## REML criterion at convergence: 62303
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9459 -0.7134  0.1018  0.7232  2.6304 
## 
## Random effects:
##  Groups   Name               Variance Std.Dev. Corr 
##  pp       (Intercept)        121.51   11.023        
##           label_typeenhanced  23.35    4.832   0.03 
##  food     (Intercept)         98.44    9.922        
##           label_typeenhanced  20.23    4.498   -0.45
##  Residual                    639.31   25.285        
## Number of obs: 6640, groups:  pp, 166; food, 40
## 
## Fixed effects:
##                    Estimate Std. Error t value
## (Intercept)          53.098      1.840  28.858
## label_typeenhanced    3.329      1.016   3.278
## 
## Correlation of Fixed Effects:
##             (Intr)
## lbl_typnhnc -0.367

Let’s inspect model diagnostics. All of them look good, and there’s only one clear outlier: veggie_burger.

# inspect standardized residuals
densityplot(resid(h1_attr, scaled = TRUE))

# q-q plot
car::qqPlot(resid(h1_attr, scaled = TRUE))

## [1] 2797 2799
# proportion of residuals in the extremes of the distribution
lmer_residuals(h1_attr)
## # A tibble: 3 x 4
##   standardized_cutoff proportion_cutoff proportion below_cutoff
##                 <dbl>             <dbl>      <dbl> <lgl>       
## 1                 2              0.05      0.0330  TRUE        
## 2                 2.5            0.01      0.00542 TRUE        
## 3                 3              0.0001    0       TRUE
# obtain outliers
h1_attr_outliers_pp <- influence.ME::influence(h1_attr, c("pp"))
h1_attr_outliers_food <- influence.ME::influence(h1_attr, c("food"))

# plot formal outliers for pp
plot(h1_attr_outliers_pp, which = 'cook')

plot(h1_attr_outliers_pp, which = 'dfbetas')[1]

plot(h1_attr_outliers_pp, which = 'dfbetas')[2]

# which ones are the highest
arrange_cook(h1_attr_outliers_pp, "pp")
## # A tibble: 6 x 2
##   pp     cooks_distance
##   <chr>           <dbl>
## 1 pp_59          0.0168
## 2 pp_181         0.0164
## 3 pp_115         0.0139
## 4 pp_18          0.0132
## 5 pp_41          0.0124
## 6 pp_126         0.0114
# plot formal outliers for food
plot(h1_attr_outliers_food, which = 'cook')

plot(h1_attr_outliers_food, which = 'dfbetas')[1]

plot(h1_attr_outliers_food, which = 'dfbetas')[2]

# which ones are so far out on cook's distance?
arrange_cook(h1_attr_outliers_food, "food")
## # A tibble: 6 x 2
##   food            cooks_distance
##   <chr>                    <dbl>
## 1 veggie_burger           0.212 
## 2 noddles_veggies         0.0744
## 3 steam_bags              0.0485
## 4 spring_rolls            0.0390
## 5 vegan_fillets           0.0384
## 6 fishless_cakes          0.0373
# plot the fitted vs. the residuals (to check for homo/heteroskedasticity) with added smoothed line.
plot(h1_attr, type = c('p', 'smooth'))

Although the difference is quite small in absolute terms, it’s still significant.

# get p-value
h1_attr_p <-
  mixed(
    rating ~
    label_type +
    (1 + label_type | pp) +
    (1 + label_type | food),
    attractiveness,
    type = 3,
    method = "S",
    check_contrasts = FALSE
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h1_attr_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ label_type + (1 + label_type | pp) + (1 + label_type | 
## Model:     food)
## Data: attractiveness
##            num Df den Df      F  Pr(>F)   
## label_type      1  47.57 10.742 0.00196 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# approximate effect size
r.squaredGLMM(h1_attr)
##              R2m      R2c
## [1,] 0.003202627 0.261087

4.1.2 Exploratory

Here, I conduct several checks to see whether the effects are robust to control variables and outlier exclusion.

I start with including the participants with a high RSI that we excluded above. Their exclusion was not preregistered.

The simulation model did not converge. I tried all current best practices:

  • increased number of iterations
  • start from previous fit
  • different optimizers
  • different contrasts

In the end, only simplifying the model worked (aka removing the intercept for food). Under this condition, the effect remained significant. For attractiveness, the model converges and shows to be robust to the inclusion of those cases.

# get p-value for simulations
h1_sim_p_rsi <-
  mixed(
    rating ~
    label_type +
    (1 + label_type | pp) +
    (0 + label_type | food),
    data = full_join(
      simulations,
      high_rsi %>% 
        filter(measure == "simulations")
    ),
    type = 3,
    method = "S",
    check_contrasts = FALSE
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(h1_sim_p_rsi)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ label_type + (1 + label_type | pp) + (0 + label_type | 
## Model:     food)
## Data: full_join
## Data: simulations
## Data: high_rsi %>% filter(measure == "simulations")
##            num Df den Df      F    Pr(>F)    
## label_type      1 100.65 34.901 4.744e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# get p-value for attractiveness
h1_attr_p_rsi <-
  mixed(
    rating ~
    label_type +
    (1 + label_type | pp) +
    (1 + label_type | food),
    data = full_join( # add high rsi cases
      attractiveness,
      high_rsi %>% 
        filter(measure == "attractiveness")
    ),
    type = 3,
    method = "S",
    check_contrasts = FALSE
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(h1_attr_p_rsi)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ label_type + (1 + label_type | pp) + (1 + label_type | 
## Model:     food)
## Data: full_join
## Data: attractiveness
## Data: high_rsi %>% filter(measure == "attractiveness")
##            num Df den Df      F   Pr(>F)   
## label_type      1  47.86 10.742 0.001954 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

As a next step, I see whether the results are robust to the exclusion of the outliers I identified above. I start with the outliers on simulations: the effect remains significant.

# get p-value
h1_sim_no_outliers <-
  mixed(
    rating ~
    label_type +
    (1 + label_type | pp) +
    (1 + label_type | food),
    simulations %>% 
      filter(
        !pp %in% c("pp_91", "pp_48"),
        food != "vegan_fillets"
      ),
    type = 3,
    method = "S",
    check_contrasts = FALSE
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h1_sim_no_outliers)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ label_type + (1 + label_type | pp) + (1 + label_type | 
## Model:     food)
## Data: %>%
## Data: simulations
## Data: filter(!pp %in% c("pp_91", "pp_48"), food != "vegan_fillets")
##            num Df den Df     F    Pr(>F)    
## label_type      1 80.682 37.94 2.701e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Next, I remove the outliers on attractiveness: the effect remains robust.

# get p-value
h1_attr_no_outliers <-
  mixed(
    rating ~
    label_type +
    (1 + label_type | pp) +
    (1 + label_type | food),
    attractiveness %>% 
      filter(food != "veggie_burger"),
    type = 3,
    method = "S",
    check_contrasts = FALSE
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h1_attr_no_outliers)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ label_type + (1 + label_type | pp) + (1 + label_type | 
## Model:     food)
## Data: %>%
## Data: attractiveness
## Data: filter(food != "veggie_burger")
##            num Df den Df      F   Pr(>F)   
## label_type      1 46.671 10.201 0.002515 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Next, I check whether excluding the two self-declared vegetarians changes the results. The results are robust.

# simulations
h1_sim_no_vegetarians <-
  mixed(
    rating ~
    label_type +
    (1 + label_type | pp) +
    (1 + label_type | food),
    simulations %>% 
      filter(!pp %in% c("pp_30", "pp_63")),
    type = 3,
    method = "S",
    check_contrasts = FALSE
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h1_sim_no_vegetarians)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ label_type + (1 + label_type | pp) + (1 + label_type | 
## Model:     food)
## Data: %>%
## Data: simulations
## Data: filter(!pp %in% c("pp_30", "pp_63"))
##            num Df den Df      F    Pr(>F)    
## label_type      1 96.318 37.522 1.966e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# attractiveness
h1_attr_no_vegetarians <-
  mixed(
    rating ~
    label_type +
    (1 + label_type | pp) +
    (1 + label_type | food),
    attractiveness %>% 
      filter(!pp %in% c("pp_30", "pp_63")),
    type = 3,
    method = "S",
    check_contrasts = FALSE
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h1_attr_no_vegetarians)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ label_type + (1 + label_type | pp) + (1 + label_type | 
## Model:     food)
## Data: %>%
## Data: attractiveness
## Data: filter(!pp %in% c("pp_30", "pp_63"))
##            num Df den Df      F   Pr(>F)   
## label_type      1 47.787 10.105 0.002594 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

As a last robustness check, I include two control variables, namely the attitude toward vegan/plant-based foods and the frequency of eating meat. For the former, we preregistered to take the mean of those two attitude variables.

simulations <-
  simulations %>% 
  mutate(
    attitude_pb_vegan = c(attitude_pb + attitude_vegan) / 2
  )

attractiveness <-
  attractiveness %>% 
  mutate(
    attitude_pb_vegan = (attitude_pb + attitude_vegan) / 2
  )

Now, we include this average attitude and the frequency of eating meat as covariates. Because they are continuous, I’ll grand-mean center them to make it easier to interpret their estimate.

Apparently, attitudes toward plant-based/vegan food has a positive association with simulations, whereas frequency of eating meat has a negative association, regardless of label type. However, none of these associations are significant. The effect of label type is robust to the inclusion of these covariates.

In contrast, attitude has a positive, significant association with attractiveness. The effect of label type remains significant.

simulations <-
  simulations %>% 
  mutate_at(
    vars(attitude_pb_vegan, meat_per_week),
    list(c = ~ scale(., center = TRUE, scale = FALSE)) # create new columns with _c as suffix
  )

attractiveness <-
  attractiveness %>% 
  mutate_at(
    vars(attitude_pb_vegan, meat_per_week),
    list(c = ~ scale(., center = TRUE, scale = FALSE))
  )

# get descriptives
describe_visualize(attractiveness, 
                   attitude_pb_vegan,
                   FALSE,
                   TRUE)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val          166.0000000
## nbr.null           0.0000000
## nbr.na             0.0000000
## min                1.0000000
## max              100.0000000
## range             99.0000000
## sum             9456.0300000
## median            57.6450000
## mean              56.9640361
## SE.mean            1.9082645
## CI.mean.0.95       3.7677646
## var              604.4846136
## std.dev           24.5862688
## coef.var           0.4316104
## 
## [[2]]

# fit model
h1_sim_covariates_p <-
  mixed(
    rating ~
    label_type +
    attitude_pb_vegan_c +
    meat_per_week_c +
    (1 + label_type | pp) +
    (1 + label_type | food),
    simulations,
    type = 3,
    method = "S",
    check_contrasts = FALSE
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h1_sim_covariates_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ label_type + attitude_pb_vegan_c + meat_per_week_c + 
## Model:     (1 + label_type | pp) + (1 + label_type | food)
## Data: simulations
##                     num Df  den Df       F    Pr(>F)    
## label_type               1  95.036 37.4975 2.051e-08 ***
## attitude_pb_vegan_c      1 162.851  3.4540   0.06491 .  
## meat_per_week_c          1 162.853  0.1353   0.71343    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# fit model
h1_attr_covariates_p <-
  mixed(
    rating ~
    label_type +
    attitude_pb_vegan_c +
    meat_per_week_c +
    (1 + label_type | pp) +
    (1 + label_type | food),
    attractiveness,
    type = 3,
    method = "S",
    check_contrasts = FALSE
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h1_attr_covariates_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ label_type + attitude_pb_vegan_c + meat_per_week_c + 
## Model:     (1 + label_type | pp) + (1 + label_type | food)
## Data: attractiveness
##                     num Df den Df       F    Pr(>F)    
## label_type               1  47.58 10.7438 0.0019589 ** 
## attitude_pb_vegan_c      1 162.97 14.8507 0.0001671 ***
## meat_per_week_c          1 162.97  0.4095 0.5231183    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

As a last robustness check, I see whether gender influences the effect of label type on the two outcomes. It does not.

# drop the one "other" entry from gender (it would lead to convergence issues)
no_other_sim <-
  simulations %>% 
  filter(gender != "other") %>% 
  mutate_at(
    vars(pp, gender),
    list(~ droplevels(.))
  )

no_other_attr <-
  attractiveness %>% 
  filter(gender != "other") %>% 
  mutate_at(
    vars(pp, gender),
    list(~ droplevels(.))
  )

# set contrasts
options(contrasts = c("contr.treatment", "contr.poly"))

# construct model for simulations
h1_sim_gender <- mixed(
  rating ~
    label_type * gender + 
    (1 + label_type | pp) +
    (1 + label_type * gender | food),
  no_other_sim,
  type = 3,
  method = "S"
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# inspect model
summary(h1_sim_gender)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## rating ~ label_type * gender + (1 + label_type | pp) + (1 + label_type *  
##     gender | food)
##    Data: data
## 
## REML criterion at convergence: 60787.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4228 -0.6345  0.1247  0.6855  2.6710 
## 
## Random effects:
##  Groups   Name                Variance Std.Dev. Corr             
##  pp       (Intercept)         145.8529 12.0770                   
##           label_type1          25.9393  5.0931  0.24             
##  food     (Intercept)          55.2114  7.4304                   
##           label_type1           2.9914  1.7296   0.18            
##           gender1               1.6910  1.3004  -0.79 -0.39      
##           label_type1:gender1   0.1753  0.4187  -0.27 -0.91  0.21
##  Residual                     525.8029 22.9304                   
## Number of obs: 6598, groups:  pp, 165; food, 40
## 
## Fixed effects:
##                     Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)          59.1209     1.5963 100.7655  37.036  < 2e-16 ***
## label_type1          -3.3244     0.6016 109.3828  -5.526 2.25e-07 ***
## gender1               0.1786     1.1001 165.2168   0.162    0.871    
## label_type1:gender1   0.4516     0.5400 155.8809   0.836    0.404    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) lbl_t1 gendr1
## label_type1 0.172               
## gender1     0.169  0.036        
## lbl_typ1:g1 0.029  0.319  0.188 
## convergence code: 0
## boundary (singular) fit: see ?isSingular
# construct model for attractiveness
h1_attr_gender <- mixed(
  rating ~
    label_type * gender + 
    (1 + label_type | pp) +
    (1 + label_type * gender | food),
  no_other_attr,
  type = 3,
  method = "S",
  control = lmerControl(optimizer = "bobyqa")
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# inspect model
summary(h1_attr_gender)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## rating ~ label_type * gender + (1 + label_type | pp) + (1 + label_type *  
##     gender | food)
##    Data: data
## Control: lmerControl(optimizer = "bobyqa")
## 
## REML criterion at convergence: 61924.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9621 -0.7077  0.1075  0.7241  2.6369 
## 
## Random effects:
##  Groups   Name                Variance Std.Dev. Corr             
##  pp       (Intercept)         129.7271 11.3898                   
##           label_type1           5.8012  2.4086  -0.24            
##  food     (Intercept)          89.6218  9.4669                   
##           label_type1           6.3653  2.5230   0.22            
##           gender1               3.2596  1.8054   0.39 -0.42      
##           label_type1:gender1   0.3451  0.5875   0.41  0.98 -0.36
##  Residual                     637.2157 25.2431                   
## Number of obs: 6600, groups:  pp, 165; food, 40
## 
## Fixed effects:
##                     Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)          54.5929     1.8195  73.2750  30.005  < 2e-16 ***
## label_type1          -1.8722     0.5647  47.8587  -3.316  0.00175 ** 
## gender1              -0.2234     1.0731 164.7079  -0.208  0.83530    
## label_type1:gender1  -0.5111     0.4103 124.7951  -1.246  0.21527    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) lbl_t1 gendr1
## label_type1  0.078              
## gender1      0.315 -0.113       
## lbl_typ1:g1  0.049  0.445 -0.134
## convergence code: 0
## boundary (singular) fit: see ?isSingular

4.2 H2: Interaction effect

The preregistered hypothesis reads as follows: >This effect [of label types] may be stronger for plant-based food than for meat-based food.

Again, I’ll separate into confirmatory and exploratory sections.

4.2.1 Confirmatory

4.2.1.1 Simulations

Each combination of label type and food type was present for each participant, meaning that we should include a random effect for the interaction of label type and food type for pp as grouping factor.

For the food grouping, each food type could only be plant-based or meat-based, meaning we cannot include a random slope of food_type. That also means we cannot model the interaction. We counterbalanced which food was assigned what label type across participants. Therefore, we will only model label_type as random slope for the food grouping.

To obtain p-values later, for interactions and Type III tests, we need sum-to-zero contrasts.

# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))

# run the model
h2_sim <-
  lme4::lmer(
    rating ~
      label_type * food_type +
      (1 + label_type * food_type | pp) +
      (1 + label_type | food),
    simulations
  )
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0815543
## (tol = 0.002, component 1)
summary(h2_sim)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating ~ label_type * food_type + (1 + label_type * food_type |  
##     pp) + (1 + label_type | food)
##    Data: simulations
## 
## REML criterion at convergence: 60895.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.5354 -0.6100  0.1119  0.6607  3.1025 
## 
## Random effects:
##  Groups   Name                   Variance Std.Dev. Corr             
##  pp       (Intercept)            145.4374 12.0597                   
##           label_type1             26.4819  5.1461   0.24            
##           food_type1              36.3587  6.0298  -0.13  0.11      
##           label_type1:food_type1   0.0455  0.2133  -0.15 -0.65 -0.27
##  food     (Intercept)             25.6014  5.0598                   
##           label_type1              3.8378  1.9590  0.02             
##  Residual                        489.5949 22.1268                   
## Number of obs: 6638, groups:  pp, 166; food, 40
## 
## Fixed effects:
##                        Estimate Std. Error t value
## (Intercept)             58.9897     1.2609  46.783
## label_type1             -3.5101     0.5738  -6.117
## food_type1               5.9134     0.9658   6.123
## label_type1:food_type1   0.4667     0.4123   1.132
## 
## Correlation of Fixed Effects:
##             (Intr) lbl_t1 fd_ty1
## label_type1  0.127              
## food_type1  -0.047  0.036       
## lbl_typ1:_1 -0.005 -0.018  0.005
## convergence code: 0
## Model failed to converge with max|grad| = 0.0815543 (tol = 0.002, component 1)

The model throws a convergence error. Looking at the summary, nothing really sticks out. Maybe some of the correlations are extremely low, so that might be problematic. Actually, since the newest update of lme4, the model fitting procedure has become more conservative. So I will follow the advice of Barr et al. (2013) and see whether I can get the warning to disappear.

I will start with simply increasing the number of iterations.

h2_sim.2 <-
  lme4::lmer(
    rating ~
      label_type * food_type +
      (1 + label_type * food_type | pp) +
      (1 + label_type | food),
    simulations,
    control = lmerControl(optCtrl = list(maxfun = 1e9))
  )
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0815543
## (tol = 0.002, component 1)

The model still gives a convergence error. Next, I will restart from previous fit.

starting_point <- getME(h2_sim.2, c("theta", "fixef"))
h2_sim.3 <- update(h2_sim.2, 
                      start = starting_point, 
                      control = lmerControl(optCtrl = list(maxfun = 1e9)))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0112122
## (tol = 0.002, component 1)

Next, I’ll try out different optimizers. About half of the optimizers lead to a converging model; the ones that converge yield a singular fit warning. Newer versions of lme4 throw this warning quite often, because the estimation has become more conservative. Overall, the fixed effects are (alomost) identical, which gives confidence in the model. The same goes for the random effects, except for one correlation with bobyqa as optimizer, which would explain the singularity warning. Overall, I believe we can trust the estimates and treat the convergence warning as a false-positive.

h2_sim_model_all <- afex::all_fit(h2_sim.3,
                                  maxfun = 1e9)
## bobyqa. : [OK]
## Nelder_Mead. : [OK]
## optimx.nlminb : [ERROR]
## optimx.L-BFGS-B : [ERROR]
## nloptwrap.NLOPT_LN_NELDERMEAD : [OK]
## nloptwrap.NLOPT_LN_BOBYQA : [OK]
## nmkbw. : [ERROR]
# which ones converged
ok_fits <- sapply(h2_sim_model_all, is, "merMod")
ok_fits
##                       bobyqa.                  Nelder_Mead. 
##                          TRUE                          TRUE 
##                 optimx.nlminb               optimx.L-BFGS-B 
##                         FALSE                         FALSE 
## nloptwrap.NLOPT_LN_NELDERMEAD     nloptwrap.NLOPT_LN_BOBYQA 
##                          TRUE                          TRUE 
##                        nmkbw. 
##                         FALSE
# was fit okay?
!sapply(h2_sim_model_all, inherits, "try-error")
##                       bobyqa.                  Nelder_Mead. 
##                          TRUE                          TRUE 
##                 optimx.nlminb               optimx.L-BFGS-B 
##                          TRUE                          TRUE 
## nloptwrap.NLOPT_LN_NELDERMEAD     nloptwrap.NLOPT_LN_BOBYQA 
##                          TRUE                          TRUE 
##                        nmkbw. 
##                          TRUE
# compare fixed effects
ok_fits_details <- h2_sim_model_all[ok_fits]
t(sapply(ok_fits_details, "fixef"))
##                               (Intercept) label_type1 food_type1
## bobyqa.                          58.98971    -3.51006   5.913429
## Nelder_Mead.                     58.98971    -3.51006   5.913429
## nloptwrap.NLOPT_LN_NELDERMEAD    58.98971    -3.51006   5.913429
## nloptwrap.NLOPT_LN_BOBYQA        58.98971    -3.51006   5.913429
##                               label_type1:food_type1
## bobyqa.                                    0.4666743
## Nelder_Mead.                               0.4666743
## nloptwrap.NLOPT_LN_NELDERMEAD              0.4666722
## nloptwrap.NLOPT_LN_BOBYQA                  0.4666722
# compare random effects
t(sapply(ok_fits_details, getME, "theta"))
##                               pp.(Intercept) pp.label_type1.(Intercept)
## bobyqa.                            0.5448175                 0.05518278
## Nelder_Mead.                       0.5448158                 0.05518189
## nloptwrap.NLOPT_LN_NELDERMEAD      0.5447007                 0.05508740
## nloptwrap.NLOPT_LN_BOBYQA          0.5447007                 0.05508740
##                               pp.food_type1.(Intercept)
## bobyqa.                                     -0.03566608
## Nelder_Mead.                                -0.03566500
## nloptwrap.NLOPT_LN_NELDERMEAD               -0.03573493
## nloptwrap.NLOPT_LN_BOBYQA                   -0.03573493
##                               pp.label_type1:food_type1.(Intercept)
## bobyqa.                                                -0.001472848
## Nelder_Mead.                                           -0.001473771
## nloptwrap.NLOPT_LN_NELDERMEAD                          -0.001368736
## nloptwrap.NLOPT_LN_BOBYQA                              -0.001368736
##                               pp.label_type1 pp.food_type1.label_type1
## bobyqa.                            0.2260010                0.03880769
## Nelder_Mead.                       0.2260027                0.03880198
## nloptwrap.NLOPT_LN_NELDERMEAD      0.2259277                0.03880994
## nloptwrap.NLOPT_LN_BOBYQA          0.2259277                0.03880994
##                               pp.label_type1:food_type1.label_type1
## bobyqa.                                                -0.006057909
## Nelder_Mead.                                           -0.006058712
## nloptwrap.NLOPT_LN_NELDERMEAD                          -0.006141224
## nloptwrap.NLOPT_LN_BOBYQA                              -0.006141224
##                               pp.food_type1
## bobyqa.                           0.2674025
## Nelder_Mead.                      0.2674005
## nloptwrap.NLOPT_LN_NELDERMEAD     0.2673442
## nloptwrap.NLOPT_LN_BOBYQA         0.2673442
##                               pp.label_type1:food_type1.food_type1
## bobyqa.                                               -0.001787103
## Nelder_Mead.                                          -0.001787343
## nloptwrap.NLOPT_LN_NELDERMEAD                         -0.001774820
## nloptwrap.NLOPT_LN_BOBYQA                             -0.001774820
##                               pp.label_type1:food_type1 food.(Intercept)
## bobyqa.                                    0.000000e+00        0.2288721
## Nelder_Mead.                               3.034817e-05        0.2288742
## nloptwrap.NLOPT_LN_NELDERMEAD              6.067120e-04        0.2288415
## nloptwrap.NLOPT_LN_BOBYQA                  6.067120e-04        0.2288415
##                               food.label_type1.(Intercept)
## bobyqa.                                        0.001444044
## Nelder_Mead.                                   0.001444658
## nloptwrap.NLOPT_LN_NELDERMEAD                  0.001472892
## nloptwrap.NLOPT_LN_BOBYQA                      0.001472892
##                               food.label_type1
## bobyqa.                             0.08877145
## Nelder_Mead.                        0.08877055
## nloptwrap.NLOPT_LN_NELDERMEAD       0.08878641
## nloptwrap.NLOPT_LN_BOBYQA           0.08878641

Let’s inspect model diagnostics. They look good, with mostly normal residuals and a linear overall prediction. As for outliers, pp_91 and vegan_filletsstand out a bit, so I will see whether the results hold up with their exclusion.

# inspect standardized residuals
densityplot(resid(h2_sim, scaled = TRUE))

# q-q plot
car::qqPlot(resid(h2_sim, scaled = TRUE))

## 3469 6513 
## 3469 6511
# proportion of residuals in the extremes of the distribution
lmer_residuals(h2_sim)
## # A tibble: 3 x 4
##   standardized_cutoff proportion_cutoff proportion below_cutoff
##                 <dbl>             <dbl>      <dbl> <lgl>       
## 1                 2              0.05      0.0422  TRUE        
## 2                 2.5            0.01      0.0128  FALSE       
## 3                 3              0.0001    0.00286 FALSE
# obtain outliers
h2_sim_outliers_pp <- influence.ME::influence(h2_sim, c("pp"))
h2_sim_outliers_food <- influence.ME::influence(h2_sim, c("food"))

# plot formal outliers for pp
plot(h2_sim_outliers_pp, which = 'cook')

plot(h2_sim_outliers_pp, which = 'dfbetas')[1]

plot(h2_sim_outliers_pp, which = 'dfbetas')[2]

plot(h2_sim_outliers_pp, which = 'dfbetas')[3]

plot(h2_sim_outliers_pp, which = 'dfbetas')[4]

# which ones are the highest
arrange_cook(h2_sim_outliers_pp, "pp")
## # A tibble: 6 x 2
##   pp     cooks_distance
##   <chr>           <dbl>
## 1 pp_91         0.0206 
## 2 pp_103        0.0138 
## 3 pp_48         0.0136 
## 4 pp_12         0.0102 
## 5 pp_71         0.00854
## 6 pp_109        0.00815
# plot formal outliers for food
plot(h2_sim_outliers_food, which = 'cook')

plot(h2_sim_outliers_food, which = 'dfbetas')[1]

plot(h2_sim_outliers_food, which = 'dfbetas')[2]

plot(h2_sim_outliers_food, which = 'dfbetas')[3]

plot(h2_sim_outliers_food, which = 'dfbetas')[4]

# which ones are so far out on cook's distance?
arrange_cook(h2_sim_outliers_food, "food")
## # A tibble: 6 x 2
##   food             cooks_distance
##   <chr>                     <dbl>
## 1 vegan_fillets            0.0837
## 2 spring_rolls             0.0625
## 3 veggie_burger            0.0591
## 4 mushroom_burrito         0.0459
## 5 mint_soup                0.0429
## 6 veggie_quiche            0.0428
# plot the fitted vs. the residuals (to check for homo/heteroskedasticity) with added smoothed line.
plot(h2_sim, type = c('p', 'smooth'))

Unsurprisingly, we find two significant main effects, but no interaction: enhanced labels lead to more simulations; meat-based foods lead to more simulations; but the effect of label does not differ for food types.

options(contrasts = c("contr.sum", "contr.poly"))

# get p-value
h2_sim_p <-
  mixed(
    rating ~
    label_type * food_type +
    (1 + label_type * food_type | pp) +
    (1 + label_type | food),
    simulations,
    type = 3,
    method = "S"
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h2_sim_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ label_type * food_type + (1 + label_type * food_type | 
## Model:     pp) + (1 + label_type | food)
## Data: simulations
##                      num Df den Df       F    Pr(>F)    
## label_type                1 96.814 37.4192 2.016e-08 ***
## food_type                 1 62.301 37.4865 6.794e-08 ***
## label_type:food_type      1 36.309  1.2812    0.2651    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# effect size approximation
r.squaredGLMM(h2_sim)
##             R2m       R2c
## [1,] 0.06133328 0.3681661

4.2.1.2 Attractiveness

We run the same maximal model as with simulations. Unsurprisingly, the model yields a convergence error. I’ll go through the same trouble shooting steps as above.

# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))

# run the model
h2_attr <-
  lme4::lmer(
    rating ~
      label_type * food_type +
      (1 + label_type * food_type | pp) +
      (1 + label_type | food),
    attractiveness
  )

summary(h2_attr)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating ~ label_type * food_type + (1 + label_type * food_type |  
##     pp) + (1 + label_type | food)
##    Data: attractiveness
## 
## REML criterion at convergence: 61971.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4478 -0.6575  0.1043  0.6899  3.2029 
## 
## Random effects:
##  Groups   Name                   Variance Std.Dev. Corr             
##  pp       (Intercept)            130.3474 11.4170                   
##           label_type1              7.2060  2.6844  -0.21            
##           food_type1              50.5751  7.1116  -0.09 -0.06      
##           label_type1:food_type1   0.3916  0.6257   0.14  0.64  0.64
##  food     (Intercept)             40.6063  6.3723                   
##           label_type1              5.5472  2.3553  0.28             
##  Residual                        585.3333 24.1937                   
## Number of obs: 6640, groups:  pp, 166; food, 40
## 
## Fixed effects:
##                        Estimate Std. Error t value
## (Intercept)             54.7621     1.3742  39.849
## label_type1             -1.6648     0.5199  -3.202
## food_type1               6.5680     1.1866   5.535
## label_type1:food_type1   0.1223     0.4788   0.255
## 
## Correlation of Fixed Effects:
##             (Intr) lbl_t1 fd_ty1
## label_type1  0.094              
## food_type1  -0.027 -0.012       
## lbl_typ1:_1  0.009  0.026  0.218
## convergence code: 0
## Model failed to converge with max|grad| = 0.0284037 (tol = 0.002, component 1)

Increasing the number of iterations still yields a warning.

h2_attr.2 <-
  lme4::lmer(
    rating ~
      label_type * food_type +
      (1 + label_type * food_type | pp) +
      (1 + label_type | food),
    attractiveness,
    control = lmerControl(optCtrl = list(maxfun = 1e9))
  )
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0284037
## (tol = 0.002, component 1)

Restarting from previous fit does the same.

starting_point <- getME(h2_attr.2, c("theta", "fixef"))
h2_attr.3 <- update(h2_sim.2, 
                      start = starting_point, 
                      control = lmerControl(optCtrl = list(maxfun = 1e9)))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0177895
## (tol = 0.002, component 1)

Next, I’ll try out different optimizers. The story is similar to the interaction model with simulations. The fixed effects are almost identical. There are slight differences in some of the random effects, but these seem minimal. Once again, I believe we can regard the convergence error as a false-positive.

h2_attr_model_all <- afex::all_fit(h2_attr.3,
                                  maxfun = 1e9)
## bobyqa. : [OK]
## Nelder_Mead. : [OK]
## optimx.nlminb : [ERROR]
## optimx.L-BFGS-B : [ERROR]
## nloptwrap.NLOPT_LN_NELDERMEAD : [OK]
## nloptwrap.NLOPT_LN_BOBYQA : [OK]
## nmkbw. : [ERROR]
# which ones converged
ok_fits <- sapply(h2_attr_model_all, is, "merMod")
ok_fits
##                       bobyqa.                  Nelder_Mead. 
##                          TRUE                          TRUE 
##                 optimx.nlminb               optimx.L-BFGS-B 
##                         FALSE                         FALSE 
## nloptwrap.NLOPT_LN_NELDERMEAD     nloptwrap.NLOPT_LN_BOBYQA 
##                          TRUE                          TRUE 
##                        nmkbw. 
##                         FALSE
# was fit okay?
!sapply(h2_attr_model_all, inherits, "try-error")
##                       bobyqa.                  Nelder_Mead. 
##                          TRUE                          TRUE 
##                 optimx.nlminb               optimx.L-BFGS-B 
##                          TRUE                          TRUE 
## nloptwrap.NLOPT_LN_NELDERMEAD     nloptwrap.NLOPT_LN_BOBYQA 
##                          TRUE                          TRUE 
##                        nmkbw. 
##                          TRUE
# compare fixed effects
ok_fits_details <- h2_attr_model_all[ok_fits]
t(sapply(ok_fits_details, "fixef"))
##                               (Intercept) label_type1 food_type1
## bobyqa.                          58.98971   -3.510060   5.913429
## Nelder_Mead.                     58.98971   -3.510057   5.913437
## nloptwrap.NLOPT_LN_NELDERMEAD    58.98970   -3.510062   5.913428
## nloptwrap.NLOPT_LN_BOBYQA        58.98970   -3.510062   5.913428
##                               label_type1:food_type1
## bobyqa.                                    0.4666743
## Nelder_Mead.                               0.4666624
## nloptwrap.NLOPT_LN_NELDERMEAD              0.4666713
## nloptwrap.NLOPT_LN_BOBYQA                  0.4666713
# compare random effects
t(sapply(ok_fits_details, getME, "theta"))
##                               pp.(Intercept) pp.label_type1.(Intercept)
## bobyqa.                            0.5448176                 0.05518274
## Nelder_Mead.                       0.5454502                 0.05518476
## nloptwrap.NLOPT_LN_NELDERMEAD      0.5447853                 0.05516976
## nloptwrap.NLOPT_LN_BOBYQA          0.5447853                 0.05516976
##                               pp.food_type1.(Intercept)
## bobyqa.                                     -0.03566596
## Nelder_Mead.                                -0.03573160
## nloptwrap.NLOPT_LN_NELDERMEAD               -0.03580253
## nloptwrap.NLOPT_LN_BOBYQA                   -0.03580253
##                               pp.label_type1:food_type1.(Intercept)
## bobyqa.                                                -0.001472889
## Nelder_Mead.                                           -0.001280523
## nloptwrap.NLOPT_LN_NELDERMEAD                          -0.001312592
## nloptwrap.NLOPT_LN_BOBYQA                              -0.001312592
##                               pp.label_type1 pp.food_type1.label_type1
## bobyqa.                            0.2260011                0.03880775
## Nelder_Mead.                       0.2262993                0.03891334
## nloptwrap.NLOPT_LN_NELDERMEAD      0.2260459                0.03877229
## nloptwrap.NLOPT_LN_BOBYQA          0.2260459                0.03877229
##                               pp.label_type1:food_type1.label_type1
## bobyqa.                                                -0.006057891
## Nelder_Mead.                                           -0.006165599
## nloptwrap.NLOPT_LN_NELDERMEAD                          -0.006034952
## nloptwrap.NLOPT_LN_BOBYQA                              -0.006034952
##                               pp.food_type1
## bobyqa.                           0.2674023
## Nelder_Mead.                      0.2677056
## nloptwrap.NLOPT_LN_NELDERMEAD     0.2674238
## nloptwrap.NLOPT_LN_BOBYQA         0.2674238
##                               pp.label_type1:food_type1.food_type1
## bobyqa.                                               -0.001787118
## Nelder_Mead.                                          -0.001464064
## nloptwrap.NLOPT_LN_NELDERMEAD                         -0.001843422
## nloptwrap.NLOPT_LN_BOBYQA                             -0.001843422
##                               pp.label_type1:food_type1 food.(Intercept)
## bobyqa.                                    1.143763e-08        0.2288724
## Nelder_Mead.                               1.098753e-02        0.2296837
## nloptwrap.NLOPT_LN_NELDERMEAD              1.044228e-03        0.2289632
## nloptwrap.NLOPT_LN_BOBYQA                  1.044228e-03        0.2289632
##                               food.label_type1.(Intercept)
## bobyqa.                                        0.001444172
## Nelder_Mead.                                   0.001186154
## nloptwrap.NLOPT_LN_NELDERMEAD                  0.001188185
## nloptwrap.NLOPT_LN_BOBYQA                      0.001188185
##                               food.label_type1
## bobyqa.                             0.08877141
## Nelder_Mead.                        0.08934733
## nloptwrap.NLOPT_LN_NELDERMEAD       0.08875742
## nloptwrap.NLOPT_LN_BOBYQA           0.08875742

The model diagnostics look good, similar to the simulation model. No particular participant stands out as an outlier, maybe pp_59. As for foods, veggie_burger sticks out quite clearly. I’ll check whether the effects are robust to the exclusion of those two potential outliers.

# inspect standardized residuals
densityplot(resid(h2_attr, scaled = TRUE))

# q-q plot
car::qqPlot(resid(h2_attr, scaled = TRUE))

## [1]  316 1141
# proportion of residuals in the extremes of the distribution
lmer_residuals(h2_attr)
## # A tibble: 3 x 4
##   standardized_cutoff proportion_cutoff proportion below_cutoff
##                 <dbl>             <dbl>      <dbl> <lgl>       
## 1                 2              0.05     0.0358   TRUE        
## 2                 2.5            0.01     0.00828  TRUE        
## 3                 3              0.0001   0.000602 FALSE
# obtain outliers
h2_attr_outliers_pp <- influence.ME::influence(h2_attr, c("pp"))
h2_attr_outliers_food <- influence.ME::influence(h2_attr, c("food"))

# plot formal outliers for pp
plot(h2_attr_outliers_pp, which = 'cook')

plot(h2_attr_outliers_pp, which = 'dfbetas')[1]

plot(h2_attr_outliers_pp, which = 'dfbetas')[2]

plot(h2_attr_outliers_pp, which = 'dfbetas')[3]

plot(h2_attr_outliers_pp, which = 'dfbetas')[4]

# which ones are the highest
arrange_cook(h2_attr_outliers_pp, "pp")
## # A tibble: 6 x 2
##   pp     cooks_distance
##   <chr>           <dbl>
## 1 pp_59         0.0134 
## 2 pp_181        0.00940
## 3 pp_41         0.00909
## 4 pp_154        0.00887
## 5 pp_115        0.00744
## 6 pp_129        0.00730
# plot formal outliers for food
plot(h2_attr_outliers_food, which = 'cook')

plot(h2_attr_outliers_food, which = 'dfbetas')[1]

plot(h2_attr_outliers_food, which = 'dfbetas')[2]

plot(h2_attr_outliers_food, which = 'dfbetas')[3]

plot(h2_attr_outliers_food, which = 'dfbetas')[4]

# which ones are so far out on cook's distance?
arrange_cook(h2_attr_outliers_food, "food")
## # A tibble: 6 x 2
##   food            cooks_distance
##   <chr>                    <dbl>
## 1 veggie_burger           0.207 
## 2 spring_rolls            0.105 
## 3 noddles_veggies         0.0890
## 4 steam_bags              0.0488
## 5 salsa_pizza             0.0315
## 6 vegan_fillets           0.0303
# plot the fitted vs. the residuals (to check for homo/heteroskedasticity) with added smoothed line.
plot(h2_attr, type = c('p', 'smooth'))

As expected, the two main effects are significant, but the interaction is not.

# get p-value
h2_attr_p <-
  mixed(
    rating ~
    label_type * food_type +
    (1 + label_type * food_type | pp) +
    (1 + label_type | food),
    attractiveness,
    type = 3,
    method = "S"
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h2_attr_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ label_type * food_type + (1 + label_type * food_type | 
## Model:     pp) + (1 + label_type | food)
## Data: attractiveness
##                      num Df den Df       F    Pr(>F)    
## label_type                1 48.821 10.2553    0.0024 ** 
## food_type                 1 60.109 30.6388 7.194e-07 ***
## label_type:food_type      1 37.648  0.0653    0.7998    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# approximate effect size
r.squaredGLMM(h2_attr)
##             R2m       R2c
## [1,] 0.05304371 0.3240482

4.2.2 Exploratory

Once more, I run several robustness checks. I begin with including the eight people with a high RSI. The results don’t change.

# get p-value for simulations
h2_sim_p_rsi <-
  mixed(
    rating ~
    label_type * food_type +
    (1 + label_type * food_type | pp) +
    (1 + label_type | food),
    data = full_join(
      simulations,
      high_rsi %>% 
        filter(measure == "simulations")
    ),
    type = 3,
    method = "S"
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(h2_sim_p_rsi)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ label_type * food_type + (1 + label_type * food_type | 
## Model:     pp) + (1 + label_type | food)
## Data: full_join
## Data: simulations
## Data: high_rsi %>% filter(measure == "simulations")
##                      num Df  den Df       F    Pr(>F)    
## label_type                1 101.726 34.7022 5.004e-08 ***
## food_type                 1  61.898 36.7000 8.923e-08 ***
## label_type:food_type      1  36.251  1.1463    0.2914    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# get p-value for attractiveness
h2_attr_p_rsi <-
  mixed(
    rating ~
    label_type * food_type +
    (1 + label_type * food_type | pp) +
    (1 + label_type | food),
    data = full_join(
      attractiveness,
      high_rsi %>% 
        filter(measure == "attractiveness")
    ),
    type = 3,
    method = "S"
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(h2_attr_p_rsi)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ label_type * food_type + (1 + label_type * food_type | 
## Model:     pp) + (1 + label_type | food)
## Data: full_join
## Data: attractiveness
## Data: high_rsi %>% filter(measure == "attractiveness")
##                      num Df den Df       F    Pr(>F)    
## label_type                1 48.822 10.2343  0.002423 ** 
## food_type                 1 58.834 30.2673 8.594e-07 ***
## label_type:food_type      1 37.666  0.1282  0.722298    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The effects on simulations are robust to the exclusion of outliers.

# get p-value
h2_sim_no_outliers <-
  mixed(
    rating ~
    label_type * food_type +
    (1 + label_type * food_type | pp) +
    (1 + label_type | food),
    simulations %>% 
      filter(
        !pp %in% c("pp_91"),
        food != "vegan_fillets"
      ),
    type = 3,
    method = "S"
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h2_sim_no_outliers)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ label_type * food_type + (1 + label_type * food_type | 
## Model:     pp) + (1 + label_type | food)
## Data: %>%
## Data: simulations
## Data: filter(!pp %in% c("pp_91"), food != "vegan_fillets")
##                      num Df den Df       F    Pr(>F)    
## label_type                1 86.609 38.4520 1.853e-08 ***
## food_type                 1 62.669 38.0261 5.613e-08 ***
## label_type:food_type      1 35.623  1.8904    0.1777    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The same goes for effects on attractiveness: both main effects are robust to the exclusion of outliers.

# get p-value
h2_attr_no_outliers <-
  mixed(
    rating ~
    label_type * food_type +
    (1 + label_type * food_type | pp) +
    (1 + label_type | food),
    attractiveness %>% 
      filter(
        !pp %in% c("pp_59"),
        food != "veggie_burger"
      ),
    type = 3,
    method = "S"
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h2_attr_no_outliers)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ label_type * food_type + (1 + label_type * food_type | 
## Model:     pp) + (1 + label_type | food)
## Data: %>%
## Data: attractiveness
## Data: filter(!pp %in% c("pp_59"), food != "veggie_burger")
##                      num Df den Df       F    Pr(>F)    
## label_type                1 48.665  9.9206  0.002793 ** 
## food_type                 1 57.807 28.7523 1.503e-06 ***
## label_type:food_type      1 36.915  0.1557  0.695462    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Last, I check whether excluding the two alleged vegetarians change the results.

# simulations
h2_sim_no_vegetarians <-
  mixed(
    rating ~
    label_type * food_type +
    (1 + label_type * food_type | pp) +
    (1 + label_type | food),
    simulations %>% 
      filter(!pp %in% c("pp_30", "pp_63")),
    type = 3,
    method = "S"
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h2_sim_no_vegetarians)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ label_type * food_type + (1 + label_type * food_type | 
## Model:     pp) + (1 + label_type | food)
## Data: %>%
## Data: simulations
## Data: filter(!pp %in% c("pp_30", "pp_63"))
##                      num Df den Df       F    Pr(>F)    
## label_type                1 97.721 37.1950 2.141e-08 ***
## food_type                 1 62.160 37.9902 5.828e-08 ***
## label_type:food_type      1 36.540  1.3706    0.2493    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# attractiveness
h2_attr_no_vegetarians <-
  mixed(
    rating ~
    label_type * food_type +
    (1 + label_type * food_type | pp) +
    (1 + label_type | food),
    attractiveness %>% 
      filter(!pp %in% c("pp_30", "pp_63")),
    type = 3,
    method = "S"
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h2_attr_no_vegetarians)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ label_type * food_type + (1 + label_type * food_type | 
## Model:     pp) + (1 + label_type | food)
## Data: %>%
## Data: attractiveness
## Data: filter(!pp %in% c("pp_30", "pp_63"))
##                      num Df den Df       F    Pr(>F)    
## label_type                1 48.601  9.7809  0.002977 ** 
## food_type                 1 58.664 31.9011 4.994e-07 ***
## label_type:food_type      1 37.449  0.0682  0.795467    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.3 H3: Simulations predict attractiveness

The preregistered hypothesis reads as follows: >Across foods, simulation ratings will predict attractiveness ratings.

4.3.1 Confirmatory

Currently, the working file follows tidyverse conventions, with one observation per row. That means each participant has two rows per trials, one for their rating on simulations, one for their rating on attractiveness. The variables measure shows to what measure the rating belongs.

To predict attractiveness from simulations, the ratings need to be on the same row, meaning a separate rating variable for both measures. So far, I split up working_file into a simulations file and an attractiveness file. I now combine those two to have both ratings per trial.

Unfortunately, this is not super-straightforward: Both measures were presented on their own page, meaning that all variables relating to time (e.g., page_median_time) will be different in the two data sets. We won’t need those for the merging, so I’ll remove any variables that aren’t identical across the two data sets (except for the ratings, of course). This is basically the same as spreading/pivoting working_file to wide format along the measure variable, but at this point I’ve done so many transformations etc. that removing and merging is easier.

# remove time variables from simulation data set
sim_merge <-
  simulations %>% 
  rename(simulation_rating = rating)  %>% 
  select(
    -measure, 
    -time, 
    -page_time_median, 
    -contains("speed_factor")
  )

# same for attractiveness data set
attr_merge <- 
  attractiveness %>% 
  rename(attractiveness_rating = rating) %>% 
  select(
    -measure, 
    -time, 
    -page_time_median, 
    -contains("speed_factor")
  )

# merge the two
wide_data <-
  left_join(
    sim_merge,
    attr_merge
  ) %>% 
  select( # reorder variable names, purely cosmetic
    pp:simulation_rating,
    attractiveness_rating,
    everything()
  )

Next, we predict attractiveness with simulations, whilst controlling for label and food type. simulations is continuous, so I’ll center it. The model fails to converge.

# what's the raw correlation
cor(wide_data$simulation_rating, wide_data$attractiveness_rating, use = "pairwise.complete.obs")
## [1] 0.5046872
# center predictor
wide_data <-
  wide_data %>% 
  mutate(
    simulation_rating_c = scale(simulation_rating, center = TRUE, scale = FALSE)
  )

wide_data %>% 
  count(food, label_type)
## # A tibble: 80 x 3
##    food            label_type     n
##    <fct>           <fct>      <int>
##  1 chicken_balti   control       84
##  2 chicken_balti   enhanced      82
##  3 pepperoni_pizza control       82
##  4 pepperoni_pizza enhanced      84
##  5 empanadas       control       82
##  6 empanadas       enhanced      84
##  7 steak_pie       control       82
##  8 steak_pie       enhanced      84
##  9 pasta_bake      control       84
## 10 pasta_bake      enhanced      82
## # ... with 70 more rows
# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))

# run the model
h3_main <-
  lme4::lmer(
    attractiveness_rating ~
      simulation_rating_c + label_type + food_type +
      (1 + simulation_rating_c + label_type + food_type | pp) +
      (1 + simulation_rating_c + label_type | food),
    wide_data
  )
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.598793
## (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?
sim_attr <- 
  lme4::lmer(
    attractiveness_rating_s ~
      simulation_rating_s +
      (1 + simulation_rating_s | pp) +
      (1 + simulation_rating_s | food),
    wide_data
  )
## Error in eval(predvars, data, env): object 'attractiveness_rating_s' not found
summary(sim_attr)
## Error in summary(sim_attr): object 'sim_attr' not found

Often, it can help to standardize continuous predictors. The model still throws a convergence warning, so I’ll continue with the same troubleshooting steps as previously.

# standardize simulation rating
wide_data <-
  wide_data %>% 
  mutate(
    simulation_rating_s = scale(simulation_rating, scale = TRUE)
  )

# run the model
h3_main_s <-
  lme4::lmer(
    attractiveness_rating ~
      simulation_rating_s + label_type + food_type +
      (1 + simulation_rating_s + label_type + food_type | pp) +
      (1 + simulation_rating_s + label_type | food),
    wide_data
  )
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0131649
## (tol = 0.002, component 1)
summary(h3_main_s)
## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## attractiveness_rating ~ simulation_rating_s + label_type + food_type +  
##     (1 + simulation_rating_s + label_type + food_type | pp) +  
##     (1 + simulation_rating_s + label_type | food)
##    Data: wide_data
## 
## REML criterion at convergence: 60541.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9463 -0.5759  0.0826  0.6397  3.8189 
## 
## Random effects:
##  Groups   Name                Variance Std.Dev. Corr             
##  pp       (Intercept)          82.237   9.068                    
##           simulation_rating_s  29.865   5.465    0.10            
##           label_type1           4.135   2.033   -0.38  0.57      
##           food_type1           28.101   5.301   -0.08 -0.08 -0.04
##  food     (Intercept)          17.020   4.126                    
##           simulation_rating_s   1.955   1.398   -0.26            
##           label_type1           2.610   1.615    0.33  0.61      
##  Residual                     471.768  21.720                    
## Number of obs: 6638, groups:  pp, 166; food, 40
## 
## Fixed effects:
##                     Estimate Std. Error t value
## (Intercept)          54.7725     1.0052  54.489
## simulation_rating_s  12.3703     0.5971  20.716
## label_type1          -0.1942     0.4087  -0.475
## food_type1            3.9987     0.7925   5.046
## 
## Correlation of Fixed Effects:
##             (Intr) smlt__ lbl_t1
## smltn_rtng_ -0.019              
## label_type1  0.033  0.365       
## food_type1  -0.039 -0.073 -0.018
## convergence code: 0
## Model failed to converge with max|grad| = 0.0131649 (tol = 0.002, component 1)

Increase number of iterations. Still receive a warning.

# run the model
h3_main_s.2 <-
  lme4::lmer(
    attractiveness_rating ~
      simulation_rating_s + label_type + food_type +
      (1 + simulation_rating_s + label_type + food_type | pp) +
      (1 + simulation_rating_s + label_type | food),
    wide_data,
    control = lmerControl(optCtrl = list(maxfun = 1e9))
  )
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.0131649
## (tol = 0.002, component 1)
summary(h3_main_s.2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## attractiveness_rating ~ simulation_rating_s + label_type + food_type +  
##     (1 + simulation_rating_s + label_type + food_type | pp) +  
##     (1 + simulation_rating_s + label_type | food)
##    Data: wide_data
## Control: lmerControl(optCtrl = list(maxfun = 1e+09))
## 
## REML criterion at convergence: 60541.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9463 -0.5759  0.0826  0.6397  3.8189 
## 
## Random effects:
##  Groups   Name                Variance Std.Dev. Corr             
##  pp       (Intercept)          82.237   9.068                    
##           simulation_rating_s  29.865   5.465    0.10            
##           label_type1           4.135   2.033   -0.38  0.57      
##           food_type1           28.101   5.301   -0.08 -0.08 -0.04
##  food     (Intercept)          17.020   4.126                    
##           simulation_rating_s   1.955   1.398   -0.26            
##           label_type1           2.610   1.615    0.33  0.61      
##  Residual                     471.768  21.720                    
## Number of obs: 6638, groups:  pp, 166; food, 40
## 
## Fixed effects:
##                     Estimate Std. Error t value
## (Intercept)          54.7725     1.0052  54.489
## simulation_rating_s  12.3703     0.5971  20.716
## label_type1          -0.1942     0.4087  -0.475
## food_type1            3.9987     0.7925   5.046
## 
## Correlation of Fixed Effects:
##             (Intr) smlt__ lbl_t1
## smltn_rtng_ -0.019              
## label_type1  0.033  0.365       
## food_type1  -0.039 -0.073 -0.018
## convergence code: 0
## Model failed to converge with max|grad| = 0.0131649 (tol = 0.002, component 1)

Restart from previous fit. Same warning.

# run the model
starting_point <- getME(h3_main_s.2, c("theta", "fixef"))
h3_main_s.3 <- update(h3_main_s.2, 
                      start = starting_point, 
                      control = lmerControl(optCtrl = list(maxfun = 1e9)))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00572005
## (tol = 0.002, component 1)
summary(h3_main_s.3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## attractiveness_rating ~ simulation_rating_s + label_type + food_type +  
##     (1 + simulation_rating_s + label_type + food_type | pp) +  
##     (1 + simulation_rating_s + label_type | food)
##    Data: wide_data
## Control: lmerControl(optCtrl = list(maxfun = 1e+09))
## 
## REML criterion at convergence: 60541.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9461 -0.5759  0.0823  0.6397  3.8187 
## 
## Random effects:
##  Groups   Name                Variance Std.Dev. Corr             
##  pp       (Intercept)          82.289   9.071                    
##           simulation_rating_s  29.879   5.466    0.10            
##           label_type1           4.139   2.034   -0.38  0.57      
##           food_type1           28.091   5.300   -0.08 -0.08 -0.04
##  food     (Intercept)          17.024   4.126                    
##           simulation_rating_s   1.948   1.396   -0.26            
##           label_type1           2.605   1.614    0.33  0.61      
##  Residual                     471.760  21.720                    
## Number of obs: 6638, groups:  pp, 166; food, 40
## 
## Fixed effects:
##                     Estimate Std. Error t value
## (Intercept)          54.7726     1.0054  54.478
## simulation_rating_s  12.3703     0.5971  20.718
## label_type1          -0.1943     0.4086  -0.475
## food_type1            3.9991     0.7924   5.047
## 
## Correlation of Fixed Effects:
##             (Intr) smlt__ lbl_t1
## smltn_rtng_ -0.019              
## label_type1  0.032  0.365       
## food_type1  -0.039 -0.073 -0.018
## convergence code: 0
## Model failed to converge with max|grad| = 0.00572005 (tol = 0.002, component 1)

Like previously, I check whether the estimates are consistent across optimizers. Once again, the estimates are identical across optimizers.

h3_main_model_s_all <- afex::all_fit(h3_main_s.3,
                                  maxfun = 1e9)
## bobyqa. : [OK]
## Nelder_Mead. : [OK]
## optimx.nlminb : [ERROR]
## optimx.L-BFGS-B : [ERROR]
## nloptwrap.NLOPT_LN_NELDERMEAD :
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00572005
## (tol = 0.002, component 1)
## [OK]
## nloptwrap.NLOPT_LN_BOBYQA :
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00572005
## (tol = 0.002, component 1)
## [OK]
## nmkbw. : [ERROR]
# which ones converged
ok_fits <- sapply(h3_main_model_s_all, is, "merMod")
ok_fits
##                       bobyqa.                  Nelder_Mead. 
##                          TRUE                          TRUE 
##                 optimx.nlminb               optimx.L-BFGS-B 
##                         FALSE                         FALSE 
## nloptwrap.NLOPT_LN_NELDERMEAD     nloptwrap.NLOPT_LN_BOBYQA 
##                          TRUE                          TRUE 
##                        nmkbw. 
##                         FALSE
# was fit okay?
!sapply(h3_main_model_s_all, inherits, "try-error")
##                       bobyqa.                  Nelder_Mead. 
##                          TRUE                          TRUE 
##                 optimx.nlminb               optimx.L-BFGS-B 
##                          TRUE                          TRUE 
## nloptwrap.NLOPT_LN_NELDERMEAD     nloptwrap.NLOPT_LN_BOBYQA 
##                          TRUE                          TRUE 
##                        nmkbw. 
##                          TRUE
# compare fixed effects
ok_fits_details <- h3_main_model_s_all[ok_fits]
t(sapply(ok_fits_details, "fixef"))
##                               (Intercept) simulation_rating_s label_type1
## bobyqa.                          54.77261            12.37032  -0.1942594
## Nelder_Mead.                     54.77261            12.37033  -0.1942573
## nloptwrap.NLOPT_LN_NELDERMEAD    54.77259            12.37034  -0.1942530
## nloptwrap.NLOPT_LN_BOBYQA        54.77259            12.37034  -0.1942530
##                               food_type1
## bobyqa.                         3.999129
## Nelder_Mead.                    3.999130
## nloptwrap.NLOPT_LN_NELDERMEAD   3.999085
## nloptwrap.NLOPT_LN_BOBYQA       3.999085
# compare random effects
t(sapply(ok_fits_details, getME, "theta"))
##                               pp.(Intercept)
## bobyqa.                            0.4176410
## Nelder_Mead.                       0.4176448
## nloptwrap.NLOPT_LN_NELDERMEAD      0.4176480
## nloptwrap.NLOPT_LN_BOBYQA          0.4176480
##                               pp.simulation_rating_s.(Intercept)
## bobyqa.                                               0.02396929
## Nelder_Mead.                                          0.02397201
## nloptwrap.NLOPT_LN_NELDERMEAD                         0.02398978
## nloptwrap.NLOPT_LN_BOBYQA                             0.02398978
##                               pp.label_type1.(Intercept)
## bobyqa.                                      -0.03580145
## Nelder_Mead.                                 -0.03580073
## nloptwrap.NLOPT_LN_NELDERMEAD                -0.03580506
## nloptwrap.NLOPT_LN_BOBYQA                    -0.03580506
##                               pp.food_type1.(Intercept)
## bobyqa.                                     -0.01897510
## Nelder_Mead.                                -0.01897814
## nloptwrap.NLOPT_LN_NELDERMEAD               -0.01894625
## nloptwrap.NLOPT_LN_BOBYQA                   -0.01894625
##                               pp.simulation_rating_s
## bobyqa.                                    0.2505218
## Nelder_Mead.                               0.2505221
## nloptwrap.NLOPT_LN_NELDERMEAD              0.2505170
## nloptwrap.NLOPT_LN_BOBYQA                  0.2505170
##                               pp.label_type1.simulation_rating_s
## bobyqa.                                               0.05688584
## Nelder_Mead.                                          0.05688757
## nloptwrap.NLOPT_LN_NELDERMEAD                         0.05687374
## nloptwrap.NLOPT_LN_BOBYQA                             0.05687374
##                               pp.food_type1.simulation_rating_s
## bobyqa.                                             -0.01795389
## Nelder_Mead.                                        -0.01795366
## nloptwrap.NLOPT_LN_NELDERMEAD                       -0.01807340
## nloptwrap.NLOPT_LN_BOBYQA                           -0.01807340
##                               pp.label_type1 pp.food_type1.label_type1
## bobyqa.                           0.06523339              -0.008715632
## Nelder_Mead.                      0.06523336              -0.008717828
## nloptwrap.NLOPT_LN_NELDERMEAD     0.06524478              -0.008691725
## nloptwrap.NLOPT_LN_BOBYQA         0.06524478              -0.008691725
##                               pp.food_type1 food.(Intercept)
## bobyqa.                           0.2424429        0.1899555
## Nelder_Mead.                      0.2424367        0.1899572
## nloptwrap.NLOPT_LN_NELDERMEAD     0.2424520        0.1899649
## nloptwrap.NLOPT_LN_BOBYQA         0.2424520        0.1899649
##                               food.simulation_rating_s.(Intercept)
## bobyqa.                                                -0.01689705
## Nelder_Mead.                                           -0.01689668
## nloptwrap.NLOPT_LN_NELDERMEAD                          -0.01689815
## nloptwrap.NLOPT_LN_BOBYQA                              -0.01689815
##                               food.label_type1.(Intercept)
## bobyqa.                                         0.02422455
## Nelder_Mead.                                    0.02422853
## nloptwrap.NLOPT_LN_NELDERMEAD                   0.02421897
## nloptwrap.NLOPT_LN_BOBYQA                       0.02421897
##                               food.simulation_rating_s
## bobyqa.                                     0.06199573
## Nelder_Mead.                                0.06199344
## nloptwrap.NLOPT_LN_NELDERMEAD               0.06199703
## nloptwrap.NLOPT_LN_BOBYQA                   0.06199703
##                               food.label_type1.simulation_rating_s
## bobyqa.                                                 0.05362565
## Nelder_Mead.                                            0.05362015
## nloptwrap.NLOPT_LN_NELDERMEAD                           0.05357641
## nloptwrap.NLOPT_LN_BOBYQA                               0.05357641
##                               food.label_type1
## bobyqa.                             0.04542658
## Nelder_Mead.                        0.04542826
## nloptwrap.NLOPT_LN_NELDERMEAD       0.04544769
## nloptwrap.NLOPT_LN_BOBYQA           0.04544769

Next, I inspect the model diagnostics. pp_59 could be an outlier. As for foods, spring_rolls and veggie_burger are potential outliers.

# inspect standardized residuals
densityplot(resid(h3_main, scaled = TRUE))

# q-q plot
car::qqPlot(resid(h3_main, scaled = TRUE))

##  316 5213 
##  316 5212
# proportion of residuals in the extremes of the distribution
lmer_residuals(h3_main)
## # A tibble: 3 x 4
##   standardized_cutoff proportion_cutoff proportion below_cutoff
##                 <dbl>             <dbl>      <dbl> <lgl>       
## 1                 2              0.05      0.0461  TRUE        
## 2                 2.5            0.01      0.0140  FALSE       
## 3                 3              0.0001    0.00286 FALSE
# obtain outliers
h3_main_outliers_pp <- influence.ME::influence(h3_main, c("pp"))
h3_main_outliers_food <- influence.ME::influence(h3_main, c("food"))

# plot formal outliers for pp
plot(h3_main_outliers_pp, which = 'cook')

plot(h3_main_outliers_pp, which = 'dfbetas')[1]

plot(h3_main_outliers_pp, which = 'dfbetas')[2]

plot(h3_main_outliers_pp, which = 'dfbetas')[3]

plot(h3_main_outliers_pp, which = 'dfbetas')[4]

# which ones are the highest
arrange_cook(h3_main_outliers_pp, "pp")
## # A tibble: 6 x 2
##   pp     cooks_distance
##   <chr>           <dbl>
## 1 pp_59          0.0254
## 2 pp_126         0.0187
## 3 pp_39          0.0169
## 4 pp_66          0.0152
## 5 pp_181         0.0138
## 6 pp_12          0.0135
# plot formal outliers for food
plot(h3_main_outliers_food, which = 'cook')

plot(h3_main_outliers_food, which = 'dfbetas')[1]

plot(h3_main_outliers_food, which = 'dfbetas')[2]

plot(h3_main_outliers_food, which = 'dfbetas')[3]

plot(h3_main_outliers_food, which = 'dfbetas')[4]

# which ones are so far out on cook's distance?
arrange_cook(h3_main_outliers_food, "food")
## # A tibble: 6 x 2
##   food            cooks_distance
##   <chr>                    <dbl>
## 1 spring_rolls            0.128 
## 2 veggie_burger           0.115 
## 3 pepperoni_pizza         0.0792
## 4 noddles_veggies         0.0584
## 5 steam_bags              0.0425
## 6 chicken_breast          0.0391
# plot the fitted vs. the residuals (to check for homo/heteroskedasticity) with added smoothed line.
plot(h3_main, type = c('p', 'smooth'))

Once we include simulations as predictor, labels don’t have an effect on attractiveness anymore. This can have several explanations. Simulation and attractiveness ratings are highly correlated, meaning that simulations explain so much variance that there is little left to explain for the label type. It could also be an issue of multicollinearity. It is also possible that simulations mediate the effect of label type. However, attractiveness was assessed before simulations, ruling out causal mediation.

# get p-value
h3_main_p <-
  mixed(
    attractiveness_rating ~
    simulation_rating_s + label_type + food_type + 
    (1 + simulation_rating_s + label_type + food_type | pp) +
    (1 + simulation_rating_s + label_type | food),
    wide_data,
    type = 3,
    method = "S"
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
summary(h3_main_p)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## attractiveness_rating ~ simulation_rating_s + label_type + food_type +  
##     (1 + simulation_rating_s + label_type + food_type | pp) +  
##     (1 + simulation_rating_s + label_type | food)
##    Data: data
## 
## REML criterion at convergence: 60541.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9463 -0.5759  0.0826  0.6397  3.8189 
## 
## Random effects:
##  Groups   Name                Variance Std.Dev. Corr             
##  pp       (Intercept)          82.237   9.068                    
##           simulation_rating_s  29.865   5.465    0.10            
##           label_type1           4.135   2.033   -0.38  0.57      
##           food_type1           28.101   5.301   -0.08 -0.08 -0.04
##  food     (Intercept)          17.020   4.126                    
##           simulation_rating_s   1.955   1.398   -0.26            
##           label_type1           2.610   1.615    0.33  0.61      
##  Residual                     471.768  21.720                    
## Number of obs: 6638, groups:  pp, 166; food, 40
## 
## Fixed effects:
##                     Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)          54.7725     1.0052 115.1940  54.489  < 2e-16 ***
## simulation_rating_s  12.3703     0.5971 130.5529  20.716  < 2e-16 ***
## label_type1          -0.1942     0.4087  47.3314  -0.475    0.637    
## food_type1            3.9987     0.7925  68.2239   5.046 3.57e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) smlt__ lbl_t1
## smltn_rtng_ -0.019              
## label_type1  0.033  0.365       
## food_type1  -0.039 -0.073 -0.018
## convergence code: 0
## Model failed to converge with max|grad| = 0.0131649 (tol = 0.002, component 1)
# approximate effect size
r.squaredGLMM(h3_main)
##            R2m       R2c
## [1,] 0.2310311 0.4287556

4.3.2 Exploratory

Once more, I run several robustness checks. I begin with including the eight people with a high RSI. For that, I need to merge the high-rsi cases with the analysis file. The results don’t change.

# create high rsi file for merging
rsi_merge <-
  high_rsi %>% 
  select( # remove the same time variables
    -time, 
    -page_time_median, 
    -contains("speed_factor")
  ) %>% 
  pivot_wider( # turn into wide format
    names_from = measure,
    values_from = rating
  ) %>% 
  rename( # same names as in data_wide
    attractiveness_rating = attractiveness,
    simulation_rating = simulations
  )
  
# get p-value for simulations
h3_main_p_rsi <-
  mixed(
    attractiveness_rating ~
    simulation_rating_s + label_type + food_type +
    (1 + simulation_rating_s + label_type + food_type | pp) +
    (1 + simulation_rating_s + label_type | food),
    data = left_join(
      wide_data %>% 
        select(
          pp:rsi # so we have the same columns in both data sets, didn't do transformations (centering etc.) for high_rsi data set
        ),
      high_rsi
    ) %>% 
      mutate(
        simulation_rating_s = scale(simulation_rating, scale = TRUE)
      ),
    type = 3,
    method = "S"
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(h3_main_p_rsi)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: attractiveness_rating ~ simulation_rating_s + label_type + food_type + 
## Model:     (1 + simulation_rating_s + label_type + food_type | pp) + 
## Model:     (1 + simulation_rating_s + label_type | food)
## Data: %>%
## Data: left_join(wide_data %>% select(pp:rsi), high_rsi)
## Data: mutate(simulation_rating_s = scale(simulation_rating, scale = TRUE))
##                     num Df  den Df        F    Pr(>F)    
## simulation_rating_s      1 130.553 429.1439 < 2.2e-16 ***
## label_type               1  47.331   0.2259    0.6368    
## food_type                1  68.224  25.4600 3.571e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The effects on simulations are robust to the exclusion of outliers.

# get p-value
h3_main_no_outliers <-
  mixed(
    attractiveness_rating ~
    simulation_rating_s + label_type + food_type +
    (1 + simulation_rating_s + label_type + food_type | pp) +
    (1 + simulation_rating_s + label_type | food),
    wide_data %>% 
      filter(
        !pp %in% c("pp_59"),
        !food %in% c("spring_rolls", "veggie_burger")
      ),
    type = 3,
    method = "S"
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h3_main_no_outliers)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: attractiveness_rating ~ simulation_rating_s + label_type + food_type + 
## Model:     (1 + simulation_rating_s + label_type + food_type | pp) + 
## Model:     (1 + simulation_rating_s + label_type | food)
## Data: %>%
## Data: wide_data
## Data: filter(!pp %in% c("pp_59"), !food %in% c("spring_rolls", "veggie_burger"))
##                     num Df  den Df        F    Pr(>F)    
## simulation_rating_s      1 127.311 414.6296 < 2.2e-16 ***
## label_type               1  51.375   0.0271    0.8698    
## food_type                1  76.167  35.8727 6.499e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The effects are also robust to the exclusion of the two alleged vegetarians.

# get p-value
h3_main_no_vegetarians <-
  mixed(
    attractiveness_rating ~
    simulation_rating_s + label_type + food_type +
    (1 + simulation_rating_s + label_type + food_type | pp) +
    (1 + simulation_rating_s + label_type | food),
    wide_data %>% 
      filter(!pp %in% c("pp_30", "pp_63")),
    type = 3,
    method = "S"
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# what is it
anova(h3_main_no_vegetarians)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: attractiveness_rating ~ simulation_rating_s + label_type + food_type + 
## Model:     (1 + simulation_rating_s + label_type + food_type | pp) + 
## Model:     (1 + simulation_rating_s + label_type | food)
## Data: %>%
## Data: wide_data
## Data: filter(!pp %in% c("pp_30", "pp_63"))
##                     num Df  den Df        F    Pr(>F)    
## simulation_rating_s      1 129.983 421.2665 < 2.2e-16 ***
## label_type               1  47.485   0.1891    0.6656    
## food_type                1  65.550  27.2611 1.967e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Next, we want to know whether simulation ratings interact with either of the factors. I begin with adding an interaction term of simulation_rating_s and label_type. Actually, we don’t even need to fit a model, the two slopes are almost identical (although this doesn’t take the nested nature into account), backed up by the low estimate and p-value.

# plot the interaction
ggplot(wide_data,
       aes(
         x = simulation_rating_s, 
         y = attractiveness_rating
        )
      )+ 
  geom_smooth(method = "lm", aes(color = label_type))

# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))

# run the model
h3_interaction_label_type <-
  mixed(
    attractiveness_rating ~
      simulation_rating_s*label_type + food_type +
      (1 + simulation_rating_s*label_type + food_type | pp) +
      (1 + simulation_rating_s*label_type | food),
    wide_data,
    type = 3,
    method = "S"
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
summary(h3_interaction_label_type)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## attractiveness_rating ~ simulation_rating_s * label_type + food_type +  
##     (1 + simulation_rating_s * label_type + food_type | pp) +  
##     (1 + simulation_rating_s * label_type | food)
##    Data: data
## 
## REML criterion at convergence: 60531.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9158 -0.5767  0.0802  0.6392  3.7989 
## 
## Random effects:
##  Groups   Name                            Variance Std.Dev. Corr       
##  pp       (Intercept)                      84.4495  9.1896             
##           simulation_rating_s              30.3198  5.5063   0.08      
##           label_type1                       2.2216  1.4905  -0.53  0.80
##           food_type1                       27.9621  5.2879  -0.07 -0.07
##           simulation_rating_s:label_type1   1.7575  1.3257   0.36 -0.43
##  food     (Intercept)                      17.3046  4.1599             
##           simulation_rating_s               2.0992  1.4489  -0.24      
##           label_type1                       3.1256  1.7679   0.35  0.54
##           simulation_rating_s:label_type1   0.6853  0.8278   0.19  0.42
##  Residual                                 470.9667 21.7018             
##             
##             
##             
##             
##   0.03      
##  -0.53  0.24
##             
##             
##             
##   0.87      
##             
## Number of obs: 6638, groups:  pp, 166; food, 40
## 
## Fixed effects:
##                                 Estimate Std. Error       df t value
## (Intercept)                      54.8173     1.0171 114.7815  53.895
## simulation_rating_s              12.3134     0.6034 128.7992  20.407
## label_type1                      -0.2526     0.4132  42.8465  -0.611
## food_type1                        4.0344     0.7930  68.0733   5.088
## simulation_rating_s:label_type1   0.3558     0.3365  43.9662   1.057
##                                 Pr(>|t|)    
## (Intercept)                      < 2e-16 ***
## simulation_rating_s              < 2e-16 ***
## label_type1                        0.544    
## food_type1                      3.05e-06 ***
## simulation_rating_s:label_type1    0.296    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) smlt__ lbl_t1 fd_ty1
## smltn_rtng_ -0.029                     
## label_type1  0.048  0.373              
## food_type1  -0.036 -0.069 -0.004       
## smltn_r_:_1  0.174 -0.087  0.170  0.069
## convergence code: 0
## unable to evaluate scaled gradient
## Model failed to converge: degenerate  Hessian with 1 negative eigenvalues
anova(h3_interaction_label_type)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: attractiveness_rating ~ simulation_rating_s * label_type + food_type + 
## Model:     (1 + simulation_rating_s * label_type + food_type | pp) + 
## Model:     (1 + simulation_rating_s * label_type | food)
## Data: wide_data
##                                num Df  den Df        F    Pr(>F)    
## simulation_rating_s                 1 128.799 416.4533 < 2.2e-16 ***
## label_type                          1  42.846   0.3736    0.5443    
## food_type                           1  68.073  25.8858 3.054e-06 ***
## simulation_rating_s:label_type      1  43.966   1.1177    0.2962    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

For food type, the plot shows the two main effects, but no interaction. This is backed up by the model.

# plot the interaction
ggplot(wide_data,
       aes(
         x = simulation_rating_s, 
         y = attractiveness_rating
        )
      )+ 
  geom_smooth(method = "lm", aes(color = food_type))

# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))

# run the model
h3_interaction_food_type <-
  mixed(
    attractiveness_rating ~
      simulation_rating_s*food_type + label_type +
      (1 + simulation_rating_s*food_type + label_type | pp) +
      (1 + simulation_rating_s | food),
    wide_data,
    type = 3,
    method = "S"
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
summary(h3_interaction_food_type)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## attractiveness_rating ~ simulation_rating_s * food_type + label_type +  
##     (1 + simulation_rating_s * food_type + label_type | pp) +  
##     (1 + simulation_rating_s | food)
##    Data: data
## 
## REML criterion at convergence: 60535.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.0041 -0.5849  0.0733  0.6389  3.6820 
## 
## Random effects:
##  Groups   Name                           Variance Std.Dev. Corr       
##  pp       (Intercept)                     84.129   9.172              
##           simulation_rating_s             30.488   5.522    0.08      
##           food_type1                      27.616   5.255   -0.13 -0.05
##           label_type1                      4.028   2.007   -0.38  0.56
##           simulation_rating_s:food_type1   2.059   1.435   -0.41  0.53
##  food     (Intercept)                     16.253   4.032              
##           simulation_rating_s              1.279   1.131   -0.35      
##  Residual                                472.981  21.748              
##             
##             
##             
##             
##  -0.01      
##   0.73  0.39
##             
##             
##             
## Number of obs: 6638, groups:  pp, 166; food, 40
## 
## Fixed effects:
##                                Estimate Std. Error       df t value
## (Intercept)                     54.5797     1.0035 120.0325  54.391
## simulation_rating_s             12.4766     0.5858 131.4600  21.298
## food_type1                       3.7513     0.8089  65.0207   4.638
## label_type1                     -0.1728     0.3182 155.7765  -0.543
## simulation_rating_s:food_type1   0.4745     0.3813  60.6047   1.244
##                                Pr(>|t|)    
## (Intercept)                     < 2e-16 ***
## simulation_rating_s             < 2e-16 ***
## food_type1                     1.75e-05 ***
## label_type1                       0.588    
## simulation_rating_s:food_type1    0.218    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) smlt__ fd_ty1 lbl_t1
## smltn_rtng_ -0.031                     
## food_type1  -0.044 -0.073              
## label_type1 -0.126  0.289 -0.013       
## smltn_r_:_1 -0.148  0.157 -0.023  0.049
## convergence code: 0
## boundary (singular) fit: see ?isSingular
anova(h3_interaction_food_type)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: attractiveness_rating ~ simulation_rating_s * food_type + label_type + 
## Model:     (1 + simulation_rating_s * food_type + label_type | pp) + 
## Model:     (1 + simulation_rating_s | food)
## Data: wide_data
##                               num Df  den Df        F    Pr(>F)    
## simulation_rating_s                1 131.460 453.6241 < 2.2e-16 ***
## food_type                          1  65.021  21.5095  1.75e-05 ***
## label_type                         1 155.776   0.2949    0.5879    
## simulation_rating_s:food_type      1  60.605   1.5487    0.2181    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Simulations predicted attractiveness. As an another exploratory analysis, I’ll check whether the reverse also holds. With the bobyqa optimizer, the model converges without any warnings. All effects are significant.

# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))

# standardize attractiveness rating
wide_data <-
  wide_data %>% 
  mutate(
    attractiveness_rating_s = scale(attractiveness_rating, scale = TRUE)
  )

# run the model
h3_attr_sim <-
  mixed(
    simulation_rating ~
      attractiveness_rating_s + label_type + food_type +
      (1 + attractiveness_rating_s + label_type + food_type | pp) +
      (1 + attractiveness_rating_s + label_type | food),
    wide_data,
    type = 3,
    method = "S",
    control = lmerControl(optimizer = "bobyqa")
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
summary(h3_attr_sim)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## simulation_rating ~ attractiveness_rating_s + label_type + food_type +  
##     (1 + attractiveness_rating_s + label_type + food_type | pp) +  
##     (1 + attractiveness_rating_s + label_type | food)
##    Data: data
## Control: lmerControl(optimizer = "bobyqa")
## 
## REML criterion at convergence: 59303.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.0200 -0.5599  0.0780  0.6029  3.5699 
## 
## Random effects:
##  Groups   Name                    Variance Std.Dev. Corr             
##  pp       (Intercept)             104.6306 10.2289                   
##           attractiveness_rating_s  42.8622  6.5469  -0.24            
##           label_type1              21.8451  4.6739   0.22  0.15      
##           food_type1               11.2453  3.3534  -0.02  0.17  0.20
##  food     (Intercept)               7.2903  2.7001                   
##           attractiveness_rating_s   0.8557  0.9251  -0.69            
##           label_type1               1.5548  1.2469  -0.19  0.45      
##  Residual                         380.2797 19.5008                   
## Number of obs: 6638, groups:  pp, 166; food, 40
## 
## Fixed effects:
##                         Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)              58.7917     0.9392 168.3816  62.597  < 2e-16 ***
## attractiveness_rating_s  11.2914     0.6144 149.6092  18.379  < 2e-16 ***
## label_type1              -2.8914     0.4793 107.8881  -6.032 2.31e-08 ***
## food_type1                3.2147     0.5430  55.9004   5.920 2.06e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) attr__ lbl_t1
## attrctvns__ -0.252              
## label_type1  0.105  0.158       
## food_type1  -0.021  0.012  0.069

4.4 H4: Intention to reduce meat

The preregistered hypothesis reads as follows: >The intention to reduce meat will be associated positively with attractiveness ratings for the plant-based foods, and negatively with desire (sic: attractiveness) for meat-based foods.

For this hypothesis, I run an interaction model, predicting attractiveness from the interaction of food_type and intention to reduce meat eating (change_diet).

4.4.1 Confirmatory

Indeed, there is a positive raw relation between the intention to reduce eating meat and attractiveness ratings for plant-based foods. For meat-based foods, this relation looks flat. The model throws a convergence error, but that disappears when using the bobyqa optimizer.

# standardize intention to eat meat
attractiveness <-
  attractiveness %>% 
  mutate(
    change_diet_s = scale(change_diet, scale = TRUE)
  )

# plot
ggplot(attractiveness,
       aes(
         x = change_diet_s, 
         y = rating
        )
      )+ 
  geom_smooth(method = "lm", aes(color = food_type)) + 
  ylim(0, 100)

# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))

# construct model
h4_attr <- lme4::lmer(
  rating ~
    change_diet_s * food_type +
    (1 + food_type | pp) +
    (1 | food),
  attractiveness,
  control = lmerControl(optimizer = "bobyqa")
)

# inspect model
summary(h4_attr)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating ~ change_diet_s * food_type + (1 + food_type | pp) + (1 |  
##     food)
##    Data: attractiveness
## Control: lmerControl(optimizer = "bobyqa")
## 
## REML criterion at convergence: 62019
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2246 -0.6922  0.1051  0.7014  3.2292 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  pp       (Intercept) 123.80   11.127        
##           food_type1   44.62    6.680   -0.01
##  food     (Intercept)  40.50    6.364        
##  Residual             601.45   24.525        
## Number of obs: 6640, groups:  pp, 166; food, 40
## 
## Fixed effects:
##                          Estimate Std. Error t value
## (Intercept)               54.7616     1.3598  40.273
## change_diet_s              2.5847     0.9146   2.826
## food_type1                 6.5694     1.1713   5.609
## change_diet_s:food_type1  -2.4460     0.5995  -4.080
## 
## Correlation of Fixed Effects:
##             (Intr) chng__ fd_ty1
## change_dt_s  0.000              
## food_type1  -0.004  0.000       
## chng_dt_:_1  0.000 -0.011  0.000

pp_59 and pp_129 are potential influential case. There are two potential outliers in foods: haddock, spring_rolls.

# inspect standardized residuals
densityplot(resid(h4_attr, scaled = TRUE))

# q-q plot
car::qqPlot(resid(h4_attr, scaled = TRUE))

## [1] 5497  316
# proportion of residuals in the extremes of the distribution
lmer_residuals(h4_attr)
## # A tibble: 3 x 4
##   standardized_cutoff proportion_cutoff proportion below_cutoff
##                 <dbl>             <dbl>      <dbl> <lgl>       
## 1                 2              0.05     0.0370   TRUE        
## 2                 2.5            0.01     0.00783  TRUE        
## 3                 3              0.0001   0.000452 FALSE
# obtain outliers
h4_attr_outliers_pp <- influence.ME::influence(h4_attr, c("pp"))
h4_attr_outliers_food <- influence.ME::influence(h4_attr, c("food"))

# plot formal outliers for pp
plot(h4_attr_outliers_pp, which = 'cook')

plot(h4_attr_outliers_pp, which = 'dfbetas')[1]

plot(h4_attr_outliers_pp, which = 'dfbetas')[2]

plot(h4_attr_outliers_pp, which = 'dfbetas')[3]

plot(h4_attr_outliers_pp, which = 'dfbetas')[4]

# which ones are the highest
arrange_cook(h4_attr_outliers_pp, "pp")
## # A tibble: 6 x 2
##   pp     cooks_distance
##   <chr>           <dbl>
## 1 pp_59          0.0529
## 2 pp_129         0.0432
## 3 pp_53          0.0382
## 4 pp_63          0.0275
## 5 pp_178         0.0264
## 6 pp_158         0.0233
# plot formal outliers for food
plot(h4_attr_outliers_food, which = 'cook')

plot(h4_attr_outliers_food, which = 'dfbetas')[1]

plot(h4_attr_outliers_food, which = 'dfbetas')[2]

plot(h4_attr_outliers_food, which = 'dfbetas')[3]

plot(h4_attr_outliers_food, which = 'dfbetas')[4]

# which ones are so far out on cook's distance?
arrange_cook(h4_attr_outliers_food, "food")
## # A tibble: 6 x 2
##   food           cooks_distance
##   <chr>                   <dbl>
## 1 haddock                0.0666
## 2 spring_rolls           0.0666
## 3 salsa_pizza            0.0422
## 4 vegan_fillets          0.0336
## 5 sausoyges              0.0324
## 6 fishless_cakes         0.0242
# plot the fitted vs. the residuals (to check for homo/heteroskedasticity) with added smoothed line.
plot(h4_attr, type = c('p', 'smooth'))

Next, we see whether the interaction is significant. It is, so I also estimate the simple slopes, which show the expected stronger association for plant-based compared to meat-based foods.

# get p-value
h4_attr_p <- mixed(
  rating ~
    change_diet_s * food_type +
    (1 + food_type | pp) +
    (1 | food),
  attractiveness,
  type = 3,
  method = "S",
  control = lmerControl(optimizer = "bobyqa")
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
summary(h4_attr_p)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: rating ~ change_diet_s * food_type + (1 + food_type | pp) + (1 |  
##     food)
##    Data: data
## Control: lmerControl(optimizer = "bobyqa")
## 
## REML criterion at convergence: 62019
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2246 -0.6922  0.1051  0.7014  3.2292 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  pp       (Intercept) 123.80   11.127        
##           food_type1   44.62    6.680   -0.01
##  food     (Intercept)  40.50    6.364        
##  Residual             601.45   24.525        
## Number of obs: 6640, groups:  pp, 166; food, 40
## 
## Fixed effects:
##                          Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)               54.7616     1.3598  94.1965  40.273  < 2e-16 ***
## change_diet_s              2.5847     0.9146 164.0000   2.826   0.0053 ** 
## food_type1                 6.5694     1.1713  57.3627   5.609 6.14e-07 ***
## change_diet_s:food_type1  -2.4460     0.5995 164.0001  -4.080 7.02e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) chng__ fd_ty1
## change_dt_s  0.000              
## food_type1  -0.004  0.000       
## chng_dt_:_1  0.000 -0.011  0.000
anova(h4_attr_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ change_diet_s * food_type + (1 + food_type | pp) + (1 | 
## Model:     food)
## Data: attractiveness
##                         num Df  den Df       F    Pr(>F)    
## change_diet_s                1 164.000  7.9867  0.005298 ** 
## food_type                    1  57.363 31.4556 6.136e-07 ***
## change_diet_s:food_type      1 164.000 16.6452 7.019e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# simple slopes
emtrends(
  h4_attr,
  pairwise ~ food_type,
  var = "change_diet_s"
)
## $emtrends
##  food_type   change_diet_s.trend   SE  df asymp.LCL asymp.UCL
##  meat_based                0.139 1.09 Inf     -1.99      2.27
##  plant_based               5.031 1.10 Inf      2.88      7.19
## 
## Degrees-of-freedom method: asymptotic 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast                 estimate  SE  df z.ratio p.value
##  meat_based - plant_based    -4.89 1.2 Inf -4.080  <.0001 
## 
## Degrees-of-freedom method: asymptotic
# get approximate effect size
r.squaredGLMM(h4_attr)
##             R2m      R2c
## [1,] 0.06445011 0.305648

4.4.2 Exploratory

Just as before, I check whether the inclusion of those with a high RSI change anything about the results. They don’t.

# get p-value for attractiveness
h4_attr_p_rsi <-
  mixed(
    rating ~
    change_diet_s * food_type +
    (1 + food_type | pp) +
    (1 | food),
    data = full_join( # add high rsi cases
      attractiveness,
      high_rsi %>% 
        filter(measure == "attractiveness") %>% 
        mutate(change_diet_s = scale(change_diet, scale = TRUE))
    ),
    type = 3,
    method = "S",
    control = lmerControl(optimizer = "bobyqa")
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(h4_attr_p_rsi)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ change_diet_s * food_type + (1 + food_type | pp) + (1 | 
## Model:     food)
## Data: full_join
## Data: attractiveness
## Data: high_rsi %>% filter(measure == "attractiveness") %>% mutate(change_diet_s = scale(change_diet, scale = TRUE))
##                         num Df  den Df       F    Pr(>F)    
## change_diet_s                1 172.000  9.4235   0.00249 ** 
## food_type                    1  56.208 31.0611 7.366e-07 ***
## change_diet_s:food_type      1 172.000 17.2397 5.177e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

As a next step, I see whether the results are robust to the exclusion of the outliers I identified above. The effect remains significant.

# get p-value
h4_attr_no_outliers <- mixed(
  rating ~
    change_diet_s * food_type +
    (1 + food_type | pp) +
    (1 | food),
  attractiveness %>% 
    filter(!pp %in% c("pp_59", "pp_129")) %>% 
    filter(!food %in% c("haddock", "spring_rolls")),
  type = 3,
  method = "S",
  control = lmerControl(optimizer = "bobyqa")
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(h4_attr_no_outliers)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ change_diet_s * food_type + (1 + food_type | pp) + (1 | 
## Model:     food)
## Data: %>%
## Data: attractiveness %>% filter(!pp %in% c("pp_59", "pp_129"))
## Data: filter(!food %in% c("haddock", "spring_rolls"))
##                         num Df  den Df      F    Pr(>F)    
## change_diet_s                1 162.000  8.141  0.004893 ** 
## food_type                    1  59.779 42.179 1.834e-08 ***
## change_diet_s:food_type      1 162.000 21.307 7.923e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The effects are also robust to the exclusion of the two alleged vegetarians.

# get p-value
h4_attr_no_vegetarians <- mixed(
  rating ~
    change_diet_s * food_type +
    (1 + food_type | pp) +
    (1 | food),
  attractiveness %>% 
    filter(!pp %in% c("pp_30", "pp_63")),
  type = 3,
  method = "S",
  control = lmerControl(optimizer = "bobyqa")
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(h4_attr_no_vegetarians)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ change_diet_s * food_type + (1 + food_type | pp) + (1 | 
## Model:     food)
## Data: %>%
## Data: attractiveness
## Data: filter(!pp %in% c("pp_30", "pp_63"))
##                         num Df  den Df       F    Pr(>F)    
## change_diet_s                1 162.000  7.7278 0.0060816 ** 
## food_type                    1  56.368 32.3769 4.742e-07 ***
## change_diet_s:food_type      1 162.000 14.2653 0.0002226 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.5 Further exploratory tests

Out of interest, I’d like to see the interaction in H4 with meat eating frequency.

# standardize the predictor
attractiveness <-
  attractiveness %>% 
  mutate(
    meat_per_week_s = scale(meat_per_week, scale = TRUE)
  )

# plot
ggplot(attractiveness,
       aes(
         x = meat_per_week_s, 
         y = rating
        )
      )+ 
  geom_smooth(method = "lm", aes(color = food_type)) + 
  ylim(0, 100)

# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))

# fit model
exp_attr_meat <- lme4::lmer(
  rating ~
    meat_per_week_s * food_type +
    (1 + food_type | pp) +
    (1 | food),
  attractiveness)

summary(exp_attr_meat)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating ~ meat_per_week_s * food_type + (1 + food_type | pp) +  
##     (1 | food)
##    Data: attractiveness
## 
## REML criterion at convergence: 62015.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2016 -0.6797  0.1084  0.6989  3.2461 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  pp       (Intercept) 129.85   11.395        
##           food_type1   41.24    6.422   -0.07
##  food     (Intercept)  40.51    6.365        
##  Residual             601.45   24.525        
## Number of obs: 6640, groups:  pp, 166; food, 40
## 
## Fixed effects:
##                            Estimate Std. Error t value
## (Intercept)                 54.7616     1.3732  39.880
## meat_per_week_s             -0.8373     0.9343  -0.896
## food_type1                   6.5694     1.1626   5.650
## meat_per_week_s:food_type1   3.0546     0.5823   5.246
## 
## Correlation of Fixed Effects:
##             (Intr) mt_p__ fd_ty1
## met_pr_wk_s  0.000              
## food_type1  -0.018  0.000       
## mt_pr_w_:_1  0.000 -0.054  0.000
# get p-value
exp_attr_meat_p <- mixed(
  rating ~
    meat_per_week_s * food_type +
    (1 + food_type | pp) +
    (1 | food),
  attractiveness,
  type = 3,
  method = "S")
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(exp_attr_meat_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ meat_per_week_s * food_type + (1 + food_type | pp) + 
## Model:     (1 | food)
## Data: attractiveness
##                           num Df  den Df      F    Pr(>F)    
## meat_per_week_s                1 164.006  0.803    0.3715    
## food_type                      1  55.804 31.927 5.643e-07 ***
## meat_per_week_s:food_type      1 163.992 27.519 4.749e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# simple slopes
emtrends(
  exp_attr_meat_p,
  pairwise ~ food_type,
  var = "meat_per_week_s"
)
## $emtrends
##  food_type   meat_per_week_s.trend   SE  df asymp.LCL asymp.UCL
##  meat_based                   2.22 1.07 Inf     0.112      4.32
##  plant_based                 -3.89 1.13 Inf    -6.101     -1.68
## 
## Degrees-of-freedom method: asymptotic 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast                 estimate   SE  df z.ratio p.value
##  meat_based - plant_based     6.11 1.16 Inf 5.246   <.0001 
## 
## Degrees-of-freedom method: asymptotic
# get approximate effect size
r.squaredGLMM(exp_attr_meat)
##             R2m       R2c
## [1,] 0.06140852 0.3056774

out of curiousity, the same for simulations.

# standardize the predictor
simulations <-
  simulations %>% 
  mutate(
    meat_per_week_s = scale(meat_per_week, scale = TRUE)
  )

# plot
ggplot(simulations,
       aes(
         x = meat_per_week_s, 
         y = rating
        )
      )+ 
  geom_smooth(method = "lm", aes(color = food_type)) + 
  ylim(0, 100)

# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))

# fit model
exp_sim_meat <- lme4::lmer(
  rating ~
    meat_per_week_s * food_type +
    (1 + food_type | pp) +
    (1 | food),
  simulations)

summary(exp_sim_meat)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating ~ meat_per_week_s * food_type + (1 + food_type | pp) +  
##     (1 | food)
##    Data: simulations
## 
## REML criterion at convergence: 61210.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3937 -0.6574  0.1366  0.6861  3.1171 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  pp       (Intercept) 143.03   11.959        
##           food_type1   31.11    5.577   -0.09
##  food     (Intercept)  25.64    5.064        
##  Residual             534.14   23.111        
## Number of obs: 6638, groups:  pp, 166; food, 40
## 
## Fixed effects:
##                            Estimate Std. Error t value
## (Intercept)                 58.9889     1.2582  46.883
## meat_per_week_s             -1.4737     0.9707  -1.518
## food_type1                   5.9120     0.9533   6.201
## meat_per_week_s:food_type1   2.1085     0.5176   4.073
## 
## Correlation of Fixed Effects:
##             (Intr) mt_p__ fd_ty1
## met_pr_wk_s  0.000              
## food_type1  -0.031  0.000       
## mt_pr_w_:_1  0.000 -0.075  0.000
# get p-value
exp_sim_meat_p <- mixed(
  rating ~
    meat_per_week_s * food_type +
    (1 + food_type | pp) +
    (1 | food),
  simulations,
  type = 3,
  method = "S")
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(exp_sim_meat_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ meat_per_week_s * food_type + (1 + food_type | pp) + 
## Model:     (1 | food)
## Data: simulations
##                           num Df  den Df       F    Pr(>F)    
## meat_per_week_s                1 164.018  2.3049    0.1309    
## food_type                      1  58.431 38.4574 6.173e-08 ***
## meat_per_week_s:food_type      1 164.044 16.5918 7.200e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# simple slopes
emtrends(
  exp_sim_meat_p,
  pairwise ~ food_type,
  var = "meat_per_week_s"
)
## $emtrends
##  food_type   meat_per_week_s.trend   SE  df asymp.LCL asymp.UCL
##  meat_based                  0.635 1.07 Inf     -1.45      2.72
##  plant_based                -3.582 1.13 Inf     -5.80     -1.36
## 
## Degrees-of-freedom method: asymptotic 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast                 estimate   SE  df z.ratio p.value
##  meat_based - plant_based     4.22 1.04 Inf 4.073   <.0001 
## 
## Degrees-of-freedom method: asymptotic
# get approximate effect size
r.squaredGLMM(exp_sim_meat)
##             R2m       R2c
## [1,] 0.05360835 0.3112228

Another interest: Do labels help frequent meat eaters in finding plant-based foods more attractive? Specifically, do enhanced labels reduce the association between frequency of eating meat and simulations/attractiveness (only plant-based).

I check that with attractiveness. Indeed, the more meat people eat, they find plant-based foods attractive, but less so for enhanced food labels.

# plot
ggplot(attractiveness %>% 
         filter(food_type == "plant_based"),
       aes(
         x = meat_per_week_s, 
         y = rating
        )
      )+ 
  geom_smooth(method = "lm", aes(color = label_type)) + 
  ylim(0, 100)

# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))

# fit model
exp_attr_meat_labels <- lme4::lmer(
  rating ~
    meat_per_week_s * label_type +
    (1 + label_type | pp) +
    (1 + label_type | food),
  attractiveness %>% 
    filter(food_type == "plant_based"))

summary(exp_attr_meat_labels)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating ~ meat_per_week_s * label_type + (1 + label_type | pp) +  
##     (1 + label_type | food)
##    Data: attractiveness %>% filter(food_type == "plant_based")
## 
## REML criterion at convergence: 31037.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9383 -0.7090  0.0365  0.7153  3.2246 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  pp       (Intercept) 181.148  13.459        
##           label_type1   2.544   1.595   -0.40
##  food     (Intercept)  50.322   7.094        
##           label_type1  10.627   3.260   0.33 
##  Residual             595.413  24.401        
## Number of obs: 3320, groups:  pp, 166; food, 20
## 
## Fixed effects:
##                             Estimate Std. Error t value
## (Intercept)                  48.1951     1.9460  24.767
## meat_per_week_s              -3.8988     1.1274  -3.458
## label_type1                  -1.7861     0.8521  -2.096
## meat_per_week_s:label_type1  -1.2626     0.4414  -2.860
## 
## Correlation of Fixed Effects:
##             (Intr) mt_p__ lbl_t1
## met_pr_wk_s  0.000              
## label_type1  0.199  0.000       
## mt_pr_w_:_1  0.000 -0.103  0.000
# get p-value
exp_attr_meat_labels_p <- mixed(
  rating ~
    meat_per_week_s * label_type +
    (1 + label_type | pp) +
    (1 + label_type | food),
  attractiveness %>% 
    filter(food_type == "plant_based"),
  type = 3,
  method = "S")
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(exp_attr_meat_labels_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ meat_per_week_s * label_type + (1 + label_type | pp) + 
## Model:     (1 + label_type | food)
## Data: %>%
## Data: attractiveness
## Data: filter(food_type == "plant_based")
##                            num Df  den Df       F    Pr(>F)    
## meat_per_week_s                 1 163.881 11.9587 0.0006931 ***
## label_type                      1  19.155  4.3939 0.0495763 *  
## meat_per_week_s:label_type      1 163.119  8.1817 0.0047856 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# simple slopes
emtrends(
  exp_attr_meat_labels_p,
  pairwise ~ label_type,
  var = "meat_per_week_s"
)
## $emtrends
##  label_type meat_per_week_s.trend   SE  df asymp.LCL asymp.UCL
##  control                    -5.16 1.17 Inf     -7.45    -2.873
##  enhanced                   -2.64 1.25 Inf     -5.09    -0.182
## 
## Degrees-of-freedom method: asymptotic 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast           estimate    SE  df z.ratio p.value
##  control - enhanced    -2.53 0.883 Inf -2.860  0.0042 
## 
## Degrees-of-freedom method: asymptotic
# get approximate effect size
r.squaredGLMM(exp_attr_meat_labels)
##             R2m       R2c
## [1,] 0.02324102 0.3076937

The pattern looks similar for simulations, but the interaction is not significant.

# plot
ggplot(simulations %>% 
         filter(food_type == "plant_based"),
       aes(
         x = meat_per_week_s, 
         y = rating
        )
      )+ 
  geom_smooth(method = "lm", aes(color = label_type)) + 
  ylim(0, 100)

# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))

# fit model
exp_sim_meat_labels <- lme4::lmer(
  rating ~
    meat_per_week_s * label_type +
    (1 + label_type | pp) +
    (1 + label_type | food),
  simulations %>% 
    filter(food_type == "plant_based"),
  control = lmerControl(optimizer = "bobyqa"))

summary(exp_sim_meat_labels)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating ~ meat_per_week_s * label_type + (1 + label_type | pp) +  
##     (1 + label_type | food)
##    Data: simulations %>% filter(food_type == "plant_based")
## Control: lmerControl(optimizer = "bobyqa")
## 
## REML criterion at convergence: 30693
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.12410 -0.69306  0.07054  0.67510  3.01615 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  pp       (Intercept) 187.726  13.701        
##           label_type1  25.675   5.067   0.11 
##  food     (Intercept)  33.916   5.824        
##           label_type1   7.776   2.789   -0.03
##  Residual             517.660  22.752        
## Number of obs: 3320, groups:  pp, 166; food, 20
## 
## Fixed effects:
##                             Estimate Std. Error t value
## (Intercept)                  53.0781     1.7270  30.734
## meat_per_week_s              -3.5850     1.1346  -3.160
## label_type1                  -3.9745     0.8363  -4.752
## meat_per_week_s:label_type1  -0.9924     0.5575  -1.780
## 
## Correlation of Fixed Effects:
##             (Intr) mt_p__ lbl_t1
## met_pr_wk_s 0.000               
## label_type1 0.015  0.000        
## mt_pr_w_:_1 0.000  0.075  0.000
# get p-value
exp_sim_meat_labels_p <- mixed(
  rating ~
    meat_per_week_s * label_type +
    (1 + label_type | pp) +
    (1 + label_type | food),
  simulations %>% 
    filter(food_type == "plant_based"),
  type = 3,
  method = "S",
  control = lmerControl(optimizer = "bobyqa"))
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(exp_sim_meat_labels_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ meat_per_week_s * label_type + (1 + label_type | pp) + 
## Model:     (1 + label_type | food)
## Data: %>%
## Data: simulations
## Data: filter(food_type == "plant_based")
##                            num Df  den Df       F    Pr(>F)    
## meat_per_week_s                 1 163.943  9.9842  0.001881 ** 
## label_type                      1  29.395 22.5848 4.899e-05 ***
## meat_per_week_s:label_type      1 163.236  3.1679  0.076960 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# simple slopes
emtrends(
  exp_sim_meat_labels_p,
  pairwise ~ label_type,
  var = "meat_per_week_s"
)
## $emtrends
##  label_type meat_per_week_s.trend   SE  df asymp.LCL asymp.UCL
##  control                    -4.58 1.30 Inf     -7.13    -2.027
##  enhanced                   -2.59 1.23 Inf     -5.00    -0.189
## 
## Degrees-of-freedom method: asymptotic 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast           estimate   SE  df z.ratio p.value
##  control - enhanced    -1.98 1.12 Inf -1.780  0.0751 
## 
## Degrees-of-freedom method: asymptotic
# get approximate effect size
r.squaredGLMM(exp_sim_meat_labels)
##             R2m       R2c
## [1,] 0.03693944 0.3548545

I’d also be interested whether a feeling of hunger interacts with food type when predicting attractiveness. It does such that for meat-based foods an increase in hunger is associated with higher attractiveness, whereas the opposite is the case for plant-based foods.

# scale hunger
attractiveness <-
  attractiveness %>% 
  mutate(hungry_s = scale(hungry, center = TRUE, scale = FALSE))

# plot
ggplot(attractiveness,
       aes(
         x = hungry_s, 
         y = rating
        )
      )+ 
  geom_smooth(method = "lm", aes(color = food_type)) + 
  ylim(0, 100)

# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))

# fit model
exp_attr_hunger_food <- lme4::lmer(
  rating ~
    hungry_s * food_type +
    (1 + food_type | pp) +
    (1 | food),
  attractiveness
)

summary(exp_attr_hunger_food)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating ~ hungry_s * food_type + (1 + food_type | pp) + (1 | food)
##    Data: attractiveness
## 
## REML criterion at convergence: 62047.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2863 -0.6830  0.1065  0.6964  3.2285 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  pp       (Intercept) 130.56   11.426        
##           food_type1   47.83    6.916   -0.09
##  food     (Intercept)  40.50    6.364        
##  Residual             601.46   24.525        
## Number of obs: 6640, groups:  pp, 166; food, 40
## 
## Fixed effects:
##                      Estimate Std. Error t value
## (Intercept)         54.761595   1.374591  39.838
## hungry_s             0.001482   0.037348   0.040
## food_type1           6.569393   1.179460   5.570
## hungry_s:food_type1  0.066941   0.024541   2.728
## 
## Correlation of Fixed Effects:
##             (Intr) hngry_ fd_ty1
## hungry_s     0.000              
## food_type1  -0.028  0.000       
## hngry_s:f_1  0.000 -0.078  0.000
# get p-value
exp_attr_hunger_food_p <- mixed(
  rating ~
    hungry_s * food_type +
    (1 + food_type | pp) +
    (1 | food),
  attractiveness,
  type = 3,
  method = "S"
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(exp_attr_hunger_food_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating ~ hungry_s * food_type + (1 + food_type | pp) + (1 | food)
## Data: attractiveness
##                    num Df  den Df       F    Pr(>F)    
## hungry_s                1 164.008  0.0016  0.968393    
## food_type               1  58.846 31.0230 6.651e-07 ***
## hungry_s:food_type      1 164.004  7.4406  0.007072 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# simple slopes
emtrends(
  exp_attr_hunger_food_p,
  pairwise ~ food_type,
  var = "hungry_s"
)
## $emtrends
##  food_type   hungry_s.trend     SE  df asymp.LCL asymp.UCL
##  meat_based          0.0684 0.0431 Inf    -0.016    0.1528
##  plant_based        -0.0655 0.0463 Inf    -0.156    0.0252
## 
## Degrees-of-freedom method: asymptotic 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast                 estimate     SE  df z.ratio p.value
##  meat_based - plant_based    0.134 0.0491 Inf 2.728   0.0064 
## 
## Degrees-of-freedom method: asymptotic
# get approximate effect size
r.squaredGLMM(exp_attr_hunger_food)
##             R2m       R2c
## [1,] 0.05307852 0.3057325

If indeed there is a cultural preference for meat when people are hungry (aka when I’m hunger I want a proper meal, and meat is a proper meal), then this could also be reflected in attitudes toward different foods. Thus, I’ll inspect whether hunger predicts the attitudes for the three food types. Looking at the plot, it does not; this is backed up by the raw correlations.

# scatterplots
ggplot(working_file %>% 
         
         # trait variables, therefore only one measure per participant
         group_by(pp) %>% 
         slice(1) %>% 
         select(
           pp,
           hungry,
           attitude_meat,
           attitude_vegan,
           attitude_pb
         ) %>% 
         ungroup() %>% 
         
         # turn into long format
         pivot_longer(
           cols = c(-pp, -hungry),
           names_to = c("attitude"),
           values_to = "score"
        ),
       aes(
         x = hungry,
         y = score,
         color = attitude
       )
  ) +
  geom_point() + 
  geom_smooth(method = "lm")

# raw correlations
working_file %>%
  group_by(pp) %>%
  slice(1) %>%
  ungroup() %>% 
  select(hungry,
         attitude_meat,
         attitude_vegan,
         attitude_pb) %>%
  cor(., method = "pearson")
##                     hungry attitude_meat attitude_vegan attitude_pb
## hungry          1.00000000    0.03676262    -0.09350361 -0.01455594
## attitude_meat   0.03676262    1.00000000    -0.39609840 -0.38275595
## attitude_vegan -0.09350361   -0.39609840     1.00000000  0.72984186
## attitude_pb    -0.01455594   -0.38275595     0.72984186  1.00000000

It was interesting that the effect of label type was not significant anymore after including simulations as predictor. Below I want to see whether this also happens when predicting simulations with attractiveness. These tests follow the logic of the classic Sobel test.

When simulations predict attractiveness, the direct effect of label type is greatly decreased, more than when attractiveness predicts simulations.

### sobel test simulations predicting attractiveness

# labels predicting attractiveness
label_attr <- lme4::lmer(
  attractiveness_rating_s ~
    label_type +
    (1 + label_type | pp) +
    (1 + label_type | food),
  wide_data
)

summary(label_attr)
## Linear mixed model fit by REML ['lmerMod']
## Formula: attractiveness_rating_s ~ label_type + (1 + label_type | pp) +  
##     (1 + label_type | food)
##    Data: wide_data
## 
## REML criterion at convergence: 17437.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9459 -0.7134  0.1018  0.7232  2.6304 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  pp       (Intercept) 0.149465 0.38661       
##           label_type1 0.006773 0.08230  -0.24
##  food     (Intercept) 0.096735 0.31102       
##           label_type1 0.005867 0.07659  0.24 
##  Residual             0.741733 0.86124       
## Number of obs: 6640, groups:  pp, 166; food, 40
## 
## Fixed effects:
##               Estimate Std. Error t value
## (Intercept)  1.409e-05  5.857e-02   0.000
## label_type1 -5.670e-02  1.730e-02  -3.278
## 
## Correlation of Fixed Effects:
##             (Intr)
## label_type1 0.098
# simulations predicting attractiveness
sim_attr <- 
  lme4::lmer(
    attractiveness_rating_s ~
      simulation_rating_s +
      (1 + simulation_rating_s | pp) +
      (1 + simulation_rating_s | food),
    wide_data
  )

summary(sim_attr)
## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## attractiveness_rating_s ~ simulation_rating_s + (1 + simulation_rating_s |  
##     pp) + (1 + simulation_rating_s | food)
##    Data: wide_data
## 
## REML criterion at convergence: 15902.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4019 -0.6036  0.0869  0.6651  3.3687 
## 
## Random effects:
##  Groups   Name                Variance Std.Dev. Corr
##  pp       (Intercept)         0.096516 0.31067      
##           simulation_rating_s 0.034698 0.18627  0.12
##  food     (Intercept)         0.032570 0.18047      
##           simulation_rating_s 0.001386 0.03722  0.13
##  Residual                     0.585787 0.76537      
## Number of obs: 6638, groups:  pp, 166; food, 40
## 
## Fixed effects:
##                       Estimate Std. Error t value
## (Intercept)         -0.0004402  0.0387944  -0.011
## simulation_rating_s  0.4414063  0.0197253  22.378
## 
## Correlation of Fixed Effects:
##             (Intr)
## smltn_rtng_ 0.077
# simulations and labels predicting attractiveness
sim_attr_label <- 
  lme4::lmer(
    attractiveness_rating_s ~
      simulation_rating_s + label_type +
      (1 + simulation_rating_s + label_type | pp) +
      (1 + simulation_rating_s + label_type | food),
    wide_data
  )

### sobel test attractiveness predicting simulations

# labels predicting simulations
label_sim <- lme4::lmer(
  simulation_rating_s ~
    label_type +
    (1 + label_type | pp) +
    (1 + label_type | food),
  wide_data
)

summary(label_sim)
## Linear mixed model fit by REML ['lmerMod']
## Formula: simulation_rating_s ~ label_type + (1 + label_type | pp) + (1 +  
##     label_type | food)
##    Data: wide_data
## 
## REML criterion at convergence: 17055
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4229 -0.6460  0.1250  0.6867  2.6660 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  pp       (Intercept) 0.187006 0.43244      
##           label_type1 0.033041 0.18177  0.24
##  food     (Intercept) 0.078604 0.28036      
##           label_type1 0.004924 0.07017  0.20
##  Residual             0.683972 0.82703      
## Number of obs: 6638, groups:  pp, 166; food, 40
## 
## Fixed effects:
##               Estimate Std. Error t value
## (Intercept)  4.181e-06  5.652e-02   0.000
## label_type1 -1.263e-01  2.062e-02  -6.125
## 
## Correlation of Fixed Effects:
##             (Intr)
## label_type1 0.182
# attractiveness predicting simulations
attr_sim <- 
  lme4::lmer(
    simulation_rating_s ~
      attractiveness_rating_s +
      (1 + attractiveness_rating_s | pp) +
      (1 + attractiveness_rating_s | food),
    wide_data
  )

summary(attr_sim)
## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## simulation_rating_s ~ attractiveness_rating_s + (1 + attractiveness_rating_s |  
##     pp) + (1 + attractiveness_rating_s | food)
##    Data: wide_data
## 
## REML criterion at convergence: 15548.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9164 -0.5866  0.0989  0.6157  3.4601 
## 
## Random effects:
##  Groups   Name                    Variance  Std.Dev. Corr 
##  pp       (Intercept)             0.1351214 0.36759       
##           attractiveness_rating_s 0.0607844 0.24654  -0.28
##  food     (Intercept)             0.0196762 0.14027       
##           attractiveness_rating_s 0.0002077 0.01441  -1.00
##  Residual                         0.5467540 0.73943       
## Number of obs: 6638, groups:  pp, 166; food, 40
## 
## Fixed effects:
##                          Estimate Std. Error t value
## (Intercept)             -0.008021   0.037462  -0.214
## attractiveness_rating_s  0.437922   0.022361  19.584
## 
## Correlation of Fixed Effects:
##             (Intr)
## attrctvns__ -0.250
## convergence code: 0
## boundary (singular) fit: see ?isSingular
# attractiveness and labels predicting simulations
attr_sim_label <- 
  lme4::lmer(
    simulation_rating_s ~
      attractiveness_rating_s + label_type +
      (1 + attractiveness_rating_s + label_type | pp) +
      (1 + attractiveness_rating_s + label_type | food),
    wide_data,
    control = lmerControl(optimizer = "bobyqa")
  )

summary(attr_sim_label)
## Linear mixed model fit by REML ['lmerMod']
## Formula: simulation_rating_s ~ attractiveness_rating_s + label_type +  
##     (1 + attractiveness_rating_s + label_type | pp) + (1 + attractiveness_rating_s +  
##     label_type | food)
##    Data: wide_data
## Control: lmerControl(optimizer = "bobyqa")
## 
## REML criterion at convergence: 15275.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8800 -0.5532  0.0853  0.6029  3.5753 
## 
## Random effects:
##  Groups   Name                    Variance  Std.Dev. Corr       
##  pp       (Intercept)             0.1356493 0.36831             
##           attractiveness_rating_s 0.0616795 0.24835  -0.25      
##           label_type1             0.0281948 0.16791   0.21  0.18
##  food     (Intercept)             0.0213554 0.14613             
##           attractiveness_rating_s 0.0005867 0.02422  -0.58      
##           label_type1             0.0020754 0.04556   0.13  0.64
##  Residual                         0.5063386 0.71157             
## Number of obs: 6638, groups:  pp, 166; food, 40
## 
## Fixed effects:
##                          Estimate Std. Error t value
## (Intercept)             -0.007499   0.037980  -0.197
## attractiveness_rating_s  0.415135   0.022603  18.366
## label_type1             -0.103776   0.017337  -5.986
## 
## Correlation of Fixed Effects:
##             (Intr) attr__
## attrctvns__ -0.226       
## label_type1  0.153  0.184

I’ll run a formal mediation model on these analyses.

5. Write final files

Last, I write the final analysis files. I also save the six models from which I will extract parameters for plotting (see plots.Rmd).

# save models
save(
  h4_attr,
  exp_attr_meat, 
  exp_sim_meat, 
  exp_attr_meat_labels, 
  exp_sim_meat_labels,
  exp_attr_hunger_food,
  file = here("plots", "models", "models.RData")
)

# final file
write_csv(working_file, here("data", "study3", "analysis_file.csv"))

# attractiveness only
write_csv(attractiveness, here("data", "study3", "attractiveness.csv"))

# simulations only
write_csv(simulations, here("data", "study3", "simulations.csv"))